Next changeset 1:ff0b814c1320 (2014-11-21) |
Commit message:
Uploaded v0.1.2 (previously only on the Test Tool Shed) |
added:
test-data/four_human_proteins.fasta test-data/k12_edited_proteins.fasta test-data/k12_ten_proteins.fasta test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular test-data/rbh_blastp_k12.tabular test-data/rbh_blastp_k12_self.tabular test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rbh_none.tabular test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rhodopsin_nucs.fasta test-data/rhodopsin_proteins.fasta test-data/three_human_mRNA.fasta tools/blast_rbh/README.rst tools/blast_rbh/blast_rbh.py tools/blast_rbh/blast_rbh.xml tools/blast_rbh/tool_dependencies.xml |
b |
diff -r 000000000000 -r b828ca44a313 test-data/four_human_proteins.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,61 @@ +>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 +MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF +SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK +REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER +VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK +CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD +CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF +HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL +>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 +MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEG +GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS +DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD +LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG +KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP +DPEHRPDIFQVSYFAFKFAKKDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDT +IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE +ILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ +QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQY +QQAFFQQQMLAQHQPSQQQASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV +ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEELLDREFDLLRSNRLEERASSD +KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD +QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPE +NLGHRPLLMDSEDEEEEEKHSSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA +QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK +APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD +EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARR +HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS +WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ +SQQSQPVELDPFGAAPFPSKQ +>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 +MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL +QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL +VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE +ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL +GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG +CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC +TVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETL +EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE +RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ +NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS +DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE +RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL +KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAF +PNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV +SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV +SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG +PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR +EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG +FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA +AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV +RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN +CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEME +FEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN +PS +>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY +VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG +GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP +EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES +ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI +YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA |
b |
diff -r 000000000000 -r b828ca44a313 test-data/k12_edited_proteins.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/k12_edited_proteins.fasta Mon Aug 04 08:13:39 2014 -0400 |
[ |
@@ -0,0 +1,69 @@ +>gi|16127995|ref|NP_414542.1| thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655] +MKRISTTITTTITITTGNGAG +>gi|16127996|ref|NP_414543.1| fused aspartokinase I and homoserine dehydrogenase I [Escherichia coli str. K-12 substr. MG1655] +MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERI +FAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEA +RGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYS +AAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPC +LIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLIT +QSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL +ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSW +LKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAV +ADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELM +KFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIE +IEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFK +VKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV +>gi|16127997|ref|NP_414544.1| homoserine kinase [Escherichia coli str. K-12 substr. MG1655] +MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWE +RFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHY +DNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGF +IHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVA +DWLGKNYLQNQEGFVHICRLDTAGARVLEN +>NP_414544_near_copy +MKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWE +RFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHY +DNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGF +IHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVA +DWLGKNYLQNQEGFVHICRLDTAGARVLEN +>gi|16127998|ref|NP_414545.1| threonine synthase [Escherichia coli str. K-12 substr. MG1655] +MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQE +ILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAA +VAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNS +ANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVP +RFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTS +EPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL +RKLMMNHQ +>NP_414546_near_copy_1 +MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHL +HGPPPPPRHHKKAPHDHHGGHGPGKHHRV +>NP_414546_near_copy_2 +MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHL +HGPPPPPRHHKKAPHDHHGGHGPGKHHRRI +>gi|16128000|ref|NP_414547.1| peroxide resistance protein, lowers intracellular iron [Escherichia coli str. K-12 substr. MG1655] +MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPPQISTLMRISDKLAGINAARFHDWQPD +FTPANARQAILAFKGDVYTGLQAETFSEDDFDFAQQHLRMLSGLYGVLRPLDLMQPYRLEMGIRLENARG +KDLYQFWGDIITNKLNEALAAQGDNVVINLASDEYFKSVKPKKLNAEIIKPVFLDEKNGKFKIISFYAKK +ARGLMSRFIIENRLTKPEQLTGFNSEGYFFDEDSSSNGELVFKRYEQR +>gi|16128001|ref|NP_414548.1| putative transporter [Escherichia coli str. K-12 substr. MG1655] +MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFGKSLKNSIHPQPGGLTSFQSLCTSLAA +RVGSGNLAGVALAITAGGPGAVFWMWVAAFIGMATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMR +WMGVLFAVFLLIAYGIIFSGVQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPL +MAIIWVLTSLVICVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPN +AAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQLIQKAMRVLMGSWGAE +FVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIGGTLLSLPLMWQLADIIMACMA +ITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRYPDIGRQLSPDAWDDVSQE +>gi|16128002|ref|NP_414549.1| transaldolase B [Escherichia coli str. K-12 substr. MG1655] +MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQIPEYRKLIDDAVAWAKQQSNDRAQQI +VDATDKLAVNIGLEILKLVPGRISTEVDARLSYDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGI +RAAEQLEKEGINCNLTLLFSFAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIY +QYYKEHGYETVVMGASFRNIGEILELAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESE +FLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMIGDLL +>gi|16128003|ref|NP_414550.1| molybdochelatase incorporating molybdenum into molybdopterin [Escherichia coli str. K-12 substr. MG1655] +MNTLRIGLVSISDRASSGVYQDKGIPALEEWLTSALTTPFELETRLIPDEQAIIEQTLCELVDEMSCHLV +LTTGGTGPARRDVTPDATLAVADREMPGFGEQMRQISLHFVPTAILSRQVGVIRKQALILNLPGQPKSIK +ETLEGVKDAEGNVVVHGIFASVPYCIQLLEGPYVETAPEVVAAFRPKSARRDVSE +>gi|16128004|ref|NP_414551.1| inner membrane protein, Grp1_Fun34_YaaH family [Escherichia coli str. K-12 substr. MG1655] +MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIFYGGIAQIFAGLLEYKKGNTFGLTAFT +SYGSFWLTLVAILLMPKLGLTDAPNAQFLGVYLGLWGVFTLFMFFGTLKGARVLQFVFFSLTVLFALLAI +GNIAGNAAIIHFAGWIGLICGASAIYLAMGEVLNEQFGRTVLPIGESH + |
b |
diff -r 000000000000 -r b828ca44a313 test-data/k12_ten_proteins.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/k12_ten_proteins.fasta Mon Aug 04 08:13:39 2014 -0400 |
[ |
@@ -0,0 +1,60 @@ +>gi|16127995|ref|NP_414542.1| thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655] +MKRISTTITTTITITTGNGAG +>gi|16127996|ref|NP_414543.1| fused aspartokinase I and homoserine dehydrogenase I [Escherichia coli str. K-12 substr. MG1655] +MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERI +FAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEA +RGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYS +AAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPC +LIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLIT +QSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL +ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSW +LKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAV +ADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELM +KFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIE +IEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFK +VKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV +>gi|16127997|ref|NP_414544.1| homoserine kinase [Escherichia coli str. K-12 substr. MG1655] +MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWE +RFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHY +DNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGF +IHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVA +DWLGKNYLQNQEGFVHICRLDTAGARVLEN +>gi|16127998|ref|NP_414545.1| threonine synthase [Escherichia coli str. K-12 substr. MG1655] +MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQE +ILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAA +VAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNS +ANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVP +RFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTS +EPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL +RKLMMNHQ +>gi|16127999|ref|NP_414546.1| hypothetical protein b0005 [Escherichia coli str. K-12 substr. MG1655] +MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHL +HGPPPPPRHHKKAPHDHHGGHGPGKHHR +>gi|16128000|ref|NP_414547.1| peroxide resistance protein, lowers intracellular iron [Escherichia coli str. K-12 substr. MG1655] +MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPPQISTLMRISDKLAGINAARFHDWQPD +FTPANARQAILAFKGDVYTGLQAETFSEDDFDFAQQHLRMLSGLYGVLRPLDLMQPYRLEMGIRLENARG +KDLYQFWGDIITNKLNEALAAQGDNVVINLASDEYFKSVKPKKLNAEIIKPVFLDEKNGKFKIISFYAKK +ARGLMSRFIIENRLTKPEQLTGFNSEGYFFDEDSSSNGELVFKRYEQR +>gi|16128001|ref|NP_414548.1| putative transporter [Escherichia coli str. K-12 substr. MG1655] +MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFGKSLKNSIHPQPGGLTSFQSLCTSLAA +RVGSGNLAGVALAITAGGPGAVFWMWVAAFIGMATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMR +WMGVLFAVFLLIAYGIIFSGVQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPL +MAIIWVLTSLVICVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPN +AAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQLIQKAMRVLMGSWGAE +FVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIGGTLLSLPLMWQLADIIMACMA +ITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRYPDIGRQLSPDAWDDVSQE +>gi|16128002|ref|NP_414549.1| transaldolase B [Escherichia coli str. K-12 substr. MG1655] +MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQIPEYRKLIDDAVAWAKQQSNDRAQQI +VDATDKLAVNIGLEILKLVPGRISTEVDARLSYDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGI +RAAEQLEKEGINCNLTLLFSFAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIY +QYYKEHGYETVVMGASFRNIGEILELAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESE +FLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMIGDLL +>gi|16128003|ref|NP_414550.1| molybdochelatase incorporating molybdenum into molybdopterin [Escherichia coli str. K-12 substr. MG1655] +MNTLRIGLVSISDRASSGVYQDKGIPALEEWLTSALTTPFELETRLIPDEQAIIEQTLCELVDEMSCHLV +LTTGGTGPARRDVTPDATLAVADREMPGFGEQMRQISLHFVPTAILSRQVGVIRKQALILNLPGQPKSIK +ETLEGVKDAEGNVVVHGIFASVPYCIQLLEGPYVETAPEVVAAFRPKSARRDVSE +>gi|16128004|ref|NP_414551.1| inner membrane protein, Grp1_Fun34_YaaH family [Escherichia coli str. K-12 substr. MG1655] +MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIFYGGIAQIFAGLLEYKKGNTFGLTAFT +SYGSFWLTLVAILLMPKLGLTDAPNAQFLGVYLGLWGVFTLFMFFGTLKGARVLQFVFFSLTVLFALLAI +GNIAGNAAIIHFAGWIGLICGASAIYLAMGEVLNEQFGRTVLPIGESH + |
b |
diff -r 000000000000 -r b828ca44a313 test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,2 @@ +#A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore +ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1213 1047 86 100 1047 92.07 1514 |
b |
diff -r 000000000000 -r b828ca44a313 test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,2 @@ +#A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 348 348 100 100 348 96.55 701 |
b |
diff -r 000000000000 -r b828ca44a313 test-data/rbh_blastp_k12.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_blastp_k12.tabular Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,10 @@ +#A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore +gi|16127995|ref|NP_414542.1| gi|16127995|ref|NP_414542.1| 21 21 100 100 21 100.00 38.1 +gi|16127996|ref|NP_414543.1| gi|16127996|ref|NP_414543.1| 820 820 100 100 820 100.00 1687 +gi|16127997|ref|NP_414544.1| gi|16127997|ref|NP_414544.1| 310 310 100 100 310 100.00 642 +gi|16127998|ref|NP_414545.1| gi|16127998|ref|NP_414545.1| 428 428 100 100 428 100.00 882 +gi|16128000|ref|NP_414547.1| gi|16128000|ref|NP_414547.1| 258 258 100 100 258 100.00 531 +gi|16128001|ref|NP_414548.1| gi|16128001|ref|NP_414548.1| 476 476 100 100 476 100.00 959 +gi|16128002|ref|NP_414549.1| gi|16128002|ref|NP_414549.1| 317 317 100 100 317 100.00 648 +gi|16128003|ref|NP_414550.1| gi|16128003|ref|NP_414550.1| 195 195 100 100 195 100.00 397 +gi|16128004|ref|NP_414551.1| gi|16128004|ref|NP_414551.1| 188 188 100 100 188 100.00 365 |
b |
diff -r 000000000000 -r b828ca44a313 test-data/rbh_blastp_k12_self.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_blastp_k12_self.tabular Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,5 @@ +#A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore +gi|16127997|ref|NP_414544.1| NP_414544_near_copy 310 309 99 100 309 99.68 638 +NP_414544_near_copy gi|16127997|ref|NP_414544.1| 309 310 100 99 309 99.68 638 +NP_414546_near_copy_1 NP_414546_near_copy_2 99 100 99 98 98 100.00 197 +NP_414546_near_copy_2 NP_414546_near_copy_1 100 99 98 99 98 100.00 197 |
b |
diff -r 000000000000 -r b828ca44a313 test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,2 @@ +#A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 100 86 1047 92.07 1474 |
b |
diff -r 000000000000 -r b828ca44a313 test-data/rbh_none.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_none.tabular Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,1 @@ +#A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore |
b |
diff -r 000000000000 -r b828ca44a313 test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,2 @@ +#A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 66 57 230 97.39 559 |
b |
diff -r 000000000000 -r b828ca44a313 test-data/rhodopsin_nucs.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rhodopsin_nucs.fasta Mon Aug 04 08:13:39 2014 -0400 |
b |
b'@@ -0,0 +1,161 @@\n+>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA\n+ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCT\n+TCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCT\n+CATCGTGCTTGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACG\n+CCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCA\n+CCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGC\n+CACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTGAGCGGTACGTGGTGGTGTGT\n+AAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACCTGGGTCATGG\n+CACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATG\n+CGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTC\n+CACTTCACCATCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGG\n+CAGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCAT\n+GGTCATTGCTTTCCTGATCTGTTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGG\n+TCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCTTCGCAAAGTCCTCCTCCATCTACAACCCTG\n+TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCC\n+ACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACCGGCCTAA\n+\n+>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds\n+TCTTTCTAGTTTGGGGGGGGGGACTTTAAAGAGCCGCCAATATGAACGGAACAGAAGGCCCAAACTTTTA\n+CATACCCATGTCCAACAAGACTGGGGTGGTGCGAAGCCCCTTTGAATACCCTCAGTATTACCTGGCAGAG\n+CCATGGCAATATTCCATTCTGTGCGCGTACATGTTCCTGCTCATTCTACTTGGGTTCCCAATCAACTTCA\n+TGACCTTGTACGTCACCATCCAGCACAAGAAGCTCCGGACACCCTTAAACTATATCCTGCTGAATTTGGC\n+CTTTGCCAACCACTTCATGGTCCTGTGTGGATTCACGGTGACAATGTACTCCTCAATGAACGGATACTTC\n+ATCCTCGGAGCCACCGGTTGCTATGTTGAAGGCTTCTTCGCTACCCTTGGTGGTGAAATCGCCCTTTGGT\n+CCCTGGTGGTCTTGGCCATTGAACGATACGTGGTCGTCTGTAAGCCCATGAGCAACTTCCGATTTAGTGA\n+GAACCATGCCGTCATGGGCGTAGCGTTCACCTGGATAATGGCTTTGTCCTGTGCTGTTCCTCCACTCCTT\n+GGATGGTCCAGGTACATCCCCGAGGGCATGCAGTGCTCCTGCGGAGTCGACTACTACACCCTGAAGCCCG\n+AGGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTCGTCCACTTCACCATCCCCCTGATTATCATTTT\n+CTTCTGCTATGGCCGCCTGGTGTGCACTGTGAAAGAGGCTGCAGCTCAACAGCAAGAGTCCGCCACCACC\n+CAGAAGGCCGAGAAAGAGGTGACCAGGATGGTGATCATCATGGTGGTCTTCTTCCTTATCTGTTGGGTCC\n+CCTACGCCTCTGTCGCTTTCTTCATCTTCAGCAATCAGGGCTCTGAGTTCGGCCCCATCTTCATGACCGT\n+CCCAGCTTTCTTTGCCAAGAGTTCTTCCATCTACAACCCCGTCATCTACATCATGCTCAACAAGCAGTTC\n+CGTAACTGCATGATCACCACCCTGTGCTGCGGCAAGAATCCCTTTGGAGAAGACGATGCCTCCTCTGCCG\n+CCACCTCCAAGACAGAGGCTTCTTCTGTTTCTTCCAGCCAGGTGTCTCCTGCATAAGACCTTCCACCAGG\n+CCTGTCTCAGGGTCCGCTGCCTCACACAGCTCCCACCGCCCCAACTCCGTCTCCTGCTCGCTAAGGCGGC\n+GAAGTTCCCCTTCCATTACATAAAACGTATCTGTTCAAGAAAGGCGACGACGAAGGAGAAGAAGAGGAGC\n+CCCCCCGAACCCCTTCGCTGCTGCTGAAAACGACTTGATTGCTTCTGCAACGCAACGGGGCCTTACGGCA\n+GCGAAGGGGTTGTCATCCGGACGCGCCAAGAATTCCTTCGAGACTGTAAATATCTTAAAGGAACCGTCCT\n+GCTAGTTACCGACGCCGCTCCTGTAGCCGCCGTTCCCCCGCACTCCGGCCGGTTCATACCTCTTATTTTT\n+TTGCAATGCAACAGAAAATAATATTTTTGTTCCCACGGCTTTTCCCGGTCAGGTCTGGTAGTGGCGGAGA\n+TTGGCCGACCCCTCGCACCTGTAATAAAGCGCAG\n+\n+>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds\n+GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGC\n+CATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCT\n+CACGCTCTATGTCACGGTTCAGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCC\n+GTGGCCGACCTCTTCATGGTCTTCGGAGGCTTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTG\n+TCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGCCACCCTGGGAGGTATGAGCTGAGATGCGGG\n+TAAGGAGGAGGCATAGAGGCATCTGGGAACAGTCCCAAGCTTGGGGTGAAGGCTAAGAGGCCTTCTTCCT\n+TGTTCTGTCATTGGCGTCGTCCGAAGCCCTCACTTAATCAACAAACAGTTTGGTGGTGAGGCGCTGAGCT\n+CCATTTGGAGAGGGCAGGTATCGAGCACTGTTTTATCCCCCCTGGAGTGGTGCCATTGCCTTGCTTTACA\n+GCAAAGAAACTGAGGATGAGAGGAGTCGAGGGTCTTGCCAGGTCACATCATGGCAGAGACAGAGCTGAGT\n+TTCAACCCTGCATCTATGTGCAGTTTCCCTTGGAGCAGCTATGTTAGGTCAGACCCACGGTGGGCACTGG\n+GGAGAGAGCTGCACAAGACAGGTCCCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTCCTGATTGCCA\n+GGAGTGATGTGCAGCGCAAATGTCTGAATTCCATTATTATGTGCTCCTTCTTCCTCTGAGCCAAACATCC\n+ATCTTCATGGCTCCTAGAATTGGGTCCCACCCACATGAGCAGGTCATTTTGTTTCCCTAGAGGGGAGAGG\n+TCACT'..b'CTTCAGAGGGTCAGATTTGGGATGAGAGTGGAGGCTGCGAGGGCCTGAGTG\n+GGAAGGGATTGGAGGCAAATCTCACCAACCATGTCAGTTTGCTACACACACTTTGGGTGGACCCTGACCC\n+TGACTCATGCTTCTTGCCTTCCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCCAC\n+TGGGTGACGATGAGGCCTCCACCACTGCCTC\n+\n+>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds\n+GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGC\n+CCTGGCAGTTCTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCT\n+CACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCT\n+GTGGCCAACCTCTTCATGGTCTTTGGAGGCTTCACCACCACCCTGTATACCTCTATGCATGGATACTTCG\n+TCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGCCACGCTGGGCGGTGAAATCGCCCTGTGGTC\n+CCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGAGCAACTTCCGCTTTGGGGAG\n+AACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCCCCACTAGCCG\n+GCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGA\n+GGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTC\n+TTCTGCTACGGACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCC\n+AGAAGGCCGAGAAGGAAGTCACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCC\n+CTACGCCAGCGTGGCATTCTACATCTTTACCCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATC\n+CCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGGTCATCTATATCATGATGAACAAGCAGTTCC\n+GGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAAGCATCCACCACTGC\n+CTC\n+\n+>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds\n+ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCT\n+TCGAGGCGCCGCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCT\n+GATCATGCTTGGCTTCCCCATCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACC\n+CCCCTCAACTACATCCTGCTCAACCTGGCCGTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCA\n+CCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCCGACGGGCTGCAACCTCGAGGGCTTCTTTGC\n+CACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCGAGCGGTACGTAGTGGTGTGC\n+AAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACCTGGGTCATGG\n+CTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTG\n+CGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTA\n+CACTTCATCATCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTG\n+CAGCCCAGCAGCAGGAGTCGGCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCAT\n+GGTCATCGCTTTCCTCATATGCTGGCTGCCCTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGA\n+TCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCTTTGCCAAGACGTCTGCCGTCTATAACCCCG\n+TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACTCTCTGCTGTGGCAAGAACCC\n+CCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCCTGCCTAA\n+\n+>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds\n+CCGCTACTGACGAACCGCAACCATGAACGGCACTGAGGGACCTAACTTCTACATCCCCATGTCAAACGCC\n+ACTGGTGTAGTGAGGAGTCCATTTGAATACCCGCAGTACTACCTTGCAGAACCATGGGCTTTCTCAGCTC\n+TGTCTGCCTACATGTTCTTCCTGATTATCGCCGGATTCCCCATCAACTTCCTCACCCTGTATGTCACCAT\n+CGAACATAAGAAACTGAGGACCCCACTGAACTACATTCTGCTGAACCTGGCCGTGGCCGACCTCTTCATG\n+GTGTTTGGCGGATTCACCACCACGATGTACACCTCCATGCACGGCTACTTTGTCTTCGGCCCCACCGGCT\n+GCAACATCGAAGGGTTCTTCGCCACCCTCGGCGGCGAGATTGCCCTCTGGTGCCTCGTTGTCCTGGCCAT\n+TGAAAGGTGGATGGTCGTCTGCAAGCCAGTGACCAATTTCCGCTTCGGTGAGAGCCATGCCATCATGGGT\n+GTCATGGTGACCTGGACCATGGCATTGGCCTGTGCCCTCCCCCCTCTCTTCGGCTGGTCTCGGTACATTC\n+CGGAAGGTCTGCAGTGCTCGTGCGGGATCGACTACTATACCCGGGCGCCTGGGATCAACAATGAGTCCTT\n+TGTGATCTACATGTTTACCTGCCACTTCTCCATCCCACTCGCCGTCATCTCTTTCTGCTACGGCCGACTG\n+GTGTGCACCGTCAAAGAGGCCGCTGCCCAGCAACAGGAGTCCGAGACCACCCAGAGGGCTGAGCGGGAGG\n+TCACCCGCATGGTCGTCATCATGGTCATCTCCTTCCTGGTCTGCTGGGTGCCCTATGCCAGTGTGGCCTG\n+GTACATCTTTACCCACCAGGGAAGCACTTTTGGGCCCATCTTCATGACCATTCCATCCTTCTTTGCCAAG\n+AGTTCAGCCCTCTACAACCCCATGATCTACATCTGCATGAACAAGCAGTTCCGCCATTGCATGATCACCA\n+CCCTCTGCTGTGGGAAGAACCCCTTCGAGGAGGAGGATGGAGCGTCCGCCACTAGCTCTAAAACTGAGGC\n+TTCATCCGTGTCCTCCAGCTCTGTCTCCCCGGCATAAACCTTGTTTGACCGAACACCACGCATCAACACA\n+AAGACCAAGAATGCTGACTAAATGCTAACATTTCAGGGAAATCCAAAGACTTTTTACTATTTTTTTACAC\n+AACCATATAGGTTGCAAACAGAGGTTTAGCCCTGTTTACAGGTTGTCATCAATGTGATGTCAGTATGTAC\n+AATATAGTCAACTTGATAGCAAGTTGTTGGCTTATTTCAGATTGTATGGGCAATGTAATCAACCATATGT\n+GAAATAAATTGCAA\n' |
b |
diff -r 000000000000 -r b828ca44a313 test-data/rhodopsin_proteins.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rhodopsin_proteins.fasta Mon Aug 04 08:13:39 2014 -0400 |
[ |
@@ -0,0 +1,43 @@ +>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] +MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT +PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC +KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVV +HFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG +SNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA + +>gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin +MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRT +PLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVC +KPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVV +HFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQG +SEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQ +VSPA + +>gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] +VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA +VADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE +NHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF +FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTL +PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS + +>gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus] +VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA +VANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE +NHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF +FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTI +PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS + +>gi|223523|prf||0811197A rhodopsin [Bos taurus] +MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRT +PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC +KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVH +FIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGS +DFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA + +>gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster] +MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRT +PLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVC +KPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTC +HFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQG +STFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSS +VSPA |
b |
diff -r 000000000000 -r b828ca44a313 test-data/three_human_mRNA.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/three_human_mRNA.fasta Mon Aug 04 08:13:39 2014 -0400 |
b |
b'@@ -0,0 +1,183 @@\n+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.\n+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG\n+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT\n+TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGT'..b'AAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC\n+TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA\n+GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT\n+TTTTTCGTTCCCCCCACCCGCCCCCAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT\n+CTTTTTTTTTTTTTTTTTTTTTTTTTTTTGCTGGTGTCTGAGCTTCAGTATAAAAGACAA\n+AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA\n+>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.\n+CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC\n+CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n' |
b |
diff -r 000000000000 -r b828ca44a313 tools/blast_rbh/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blast_rbh/README.rst Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,111 @@ +Galaxy tool to find BLAST Reciprocal Best Hits (RBH) +==================================================== + +This tool is copyright 2011-2014 by Peter Cock, The James Hutton Institute +(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +See the licence text below. + +This tool is a short Python script to run reciprocal BLAST searches on a +pair of sequence files, and extract the reciprocal best hits. + +This is a work in progress, and builds on an earlier implementation which +prequired the two BLAST searches be prepared in advance. Integration allows +a much simpler user experience, and can ensure sensible filters are used. + + +Automated Installation +====================== + +Installation via the Galaxy Tool Shed should take care of the Galaxy side of +things, including the dependency the NCBI BLAST+ binaries. + + +Manual Installation +=================== + +There are just two files to install: + +- ``blast_rbh.py`` (the Python script) +- ``blast_rbh.xml`` (the Galaxy tool definition) + +The suggested location is in a ``tools/blast_rbh/`` folder. You will then +need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool +by adding the line:: + + <tool file="blast_rbh/blast_rbh.xml" /> + +If you want to run the functional tests, include the same line in your +``tool_conf.xml.sample`` file, and the sample test files under Galaxy's +``test-data/`` directory. Then:: + + ./run_functional_tests.sh -id blast_reciprocal_best_hits + +You will need to have the NCBI BLAST+ binaries installed and on the ``$PATH``. + + +History +======= + +======= ====================================================================== +Version Changes +------- ---------------------------------------------------------------------- +v0.1.0 - Initial Test Tool Shed release, targetting NCBI BLAST+ 2.2.29 +v0.1.1 - Supports self-comparison, sometimes useful for spotting duplicates. +v0.1.2 - Using optparse for command line API. + - Fixed Tool Shed dependency definition. +======= ====================================================================== + + +Developers +========== + +This tool is developed on the following GitHub repository: +https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh + +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use +the following command from the Galaxy root folder:: + + $ tar -czf blast_rbh.tar.gz tools/blast_rbh/README.rst tools/blast_rbh/blast_rbh.xml tools/blast_rbh/blast_rbh.py tools/blast_rbh/tool_dependencies.xml test-data/rhodopsin_nucs.fasta test-data/rhodopsin_proteins.fasta test-data/three_human_mRNA.fasta test-data/four_human_proteins.fasta test-data/k12_edited_proteins.fasta test-data/k12_ten_proteins.fasta test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular test-data/rbh_none.tabular test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rbh_blastp_k12.tabular test-data/rbh_blastp_k12_self.tabular + +Check this worked:: + + $ tar -tzf blast_rbh.tar.gz + tools/blast_rbh/README.rst + tools/blast_rbh/blast_rbh.xml + tools/blast_rbh/blast_rbh.py + tools/blast_rbh/tool_dependencies.xml + test-data/rhodopsin_nucs.fasta + test-data/rhodopsin_proteins.fasta + test-data/three_human_mRNA.fasta + test-data/four_human_proteins.fasta + test-data/k12_edited_proteins.fasta + test-data/k12_ten_proteins.fasta + test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular + test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular + test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular + test-data/rbh_none.tabular + test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular + test-data/rbh_blastp_k12.tabular + test-data/rbh_blastp_k12_self.tabular + + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. |
b |
diff -r 000000000000 -r b828ca44a313 tools/blast_rbh/blast_rbh.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blast_rbh/blast_rbh.py Mon Aug 04 08:13:39 2014 -0400 |
[ |
b'@@ -0,0 +1,254 @@\n+#!/usr/bin/env python\n+"""BLAST Reciprocal Best Hit (RBH) from two FASTA input files.\n+\n+Takes the following command line options,\n+1. FASTA filename of species A\n+2. FASTA filename of species B\n+3. Sequence type (prot/nucl)\n+4. BLAST type (e.g. blastn, or blastp) consistent with sequence type\n+5. Minimum BLAST Percentage identity\n+6. Minimum BLAST query coverage\n+7. Output filename\n+"""\n+\n+# TODO - Output more columns, e.g. pident, qcovs, descriptions?\n+\n+import os\n+import sys\n+import tempfile\n+import shutil\n+from optparse import OptionParser\n+\n+def stop_err( msg ):\n+ sys.stderr.write("%s\\n" % msg)\n+ sys.exit(1)\n+\n+def run(cmd):\n+ return_code = os.system(cmd)\n+ if return_code:\n+ stop_err("Error %i from: %s" % (return_code, cmd))\n+\n+if "--version" in sys.argv[1:]:\n+ #TODO - Capture version of BLAST+ binaries too?\n+ print "BLAST RBH v0.1.2"\n+ sys.exit(0)\n+\n+#Parse Command Line\n+usage = """Use as follows:\n+\n+$ python blast_rbh.py [options] A.fasta B.fasta\n+"""\n+\n+parser = OptionParser(usage=usage)\n+parser.add_option("-a", "--alphabet", dest="dbtype",\n+ default=None,\n+ help="Alphabet type (nucl or prot)")\n+parser.add_option("-t", "--task", dest="task",\n+ default=None,\n+ help="BLAST task (e.g. blastp, blastn, megablast)")\n+parser.add_option("-i","--identity", dest="min_identity",\n+ default="0",\n+ help="Minimum percentage identity (optional, default 0)")\n+parser.add_option("-c", "--coverage", dest="min_coverage",\n+ default="0",\n+ help="Minimum HSP coverage (optional, default 0)")\n+parser.add_option("-o", "--output", dest="output",\n+ default=None, metavar="FILE",\n+ help="Output filename")\n+options, args = parser.parse_args()\n+\n+if len(args) != 2:\n+ stop_err("Expects two input FASTA filenames")\n+fasta_a, fasta_b = args\n+if not os.path.isfile(fasta_a):\n+ stop_err("Missing input file for species A: %r" % fasta_a)\n+if not os.path.isfile(fasta_b):\n+ stop_err("Missing input file for species B: %r" % fasta_b)\n+if os.path.abspath(fasta_a) == os.path.abspath(fasta_b):\n+ self_comparison = True\n+ print("Doing self comparison; ignoring self matches.")\n+else:\n+ self_comparison = False\n+\n+if not options.output:\n+ stop_err("Output filename required, e.g. -o example.tab")\n+out_file = options.output\n+\n+try:\n+ min_identity = float(options.min_identity)\n+except ValueError:\n+ stop_err("Expected number between 0 and 100 for minimum identity, not %r" % min_identity)\n+if not (0 <= min_identity <= 100):\n+ stop_err("Expected minimum identity between 0 and 100, not %0.2f" % min_identity)\n+try:\n+ min_coverage = float(options.min_coverage)\n+except ValueError:\n+ stop_err("Expected number between 0 and 100 for minimum coverage, not %r" % min_coverage)\n+if not (0 <= min_coverage <= 100):\n+ stop_err("Expected minimum coverage between 0 and 100, not %0.2f" % min_coverage)\n+\n+if not options.task:\n+ stop_err("Missing BLAST task, e.g. -t blastp")\n+blast_type = options.task\n+\n+if not options.dbtype:\n+ stop_err("Missing database type, -a nucl, or -a prot")\n+dbtype = options.dbtype\n+if dbtype == "nucl":\n+ if blast_type in ["megablast", "blastn", "blastn-short", "dc-megablast"]:\n+ blast_cmd = "blastn -task %s" % blast_type\n+ elif blast_type == "tblastx":\n+ blast_cmd = "tblastx"\n+ else:\n+ stop_err("Invalid BLAST type for BLASTN: %r" % blast_type)\n+elif dbtype == "prot":\n+ if blast_type not in ["blastp", "blastp-short"]:\n+ stop_err("Invalid BLAST type for BLASTP: %r" % blast_type)\n+ blast_cmd = "blastp -task %s" % blast_type\n+else:\n+ stop_err("Expected \'nucl\' or \'prot\' for BLAST database type, not %r" % blast_type)\n+\n+try:\n+ threads = int(os.environ.get("GALAXY_SLOTS", "1"))\n+except:\n+ threads = 1\n+assert 1 <= threads, threads\n+\n+makeblastdb_exe = "makeblastdb"\n+\n'..b'+ continue\n+ score = float(parts[c_score])\n+ qlen = int(parts[c_qlen])\n+ length = int(parts[c_length])\n+ #print("Considering hit for %s to %s with score %s..." % (a, b, score))\n+ if current is None:\n+ #First hit\n+ assert best is None\n+ assert best_score is None\n+ best = dict()\n+ #Now append this hit...\n+ elif a != current:\n+ #New hit\n+ if len(best) == 1:\n+ #Unambiguous (no tied matches)\n+ yield current, list(best.values())[0]\n+ else:\n+ #print("%s has %i equally good hits: %s" % (a, len(best), ", ".join(best)))\n+ tie_warning += 1\n+ best = dict()\n+ #Now append this hit...\n+ elif score < best_score:\n+ #print("No improvement for %s, %s < %s" % (a, score, best_score))\n+ continue\n+ elif score > best_score:\n+ #This is better, discard old best\n+ best = dict()\n+ #Now append this hit...\n+ else:\n+ #print("Tied best hits for %s" % a)\n+ assert best_score == score\n+ #Now append this hit...\n+ current = a\n+ best_score = score\n+ #This will collapse two equally good hits to the same target (e.g. duplicated domain)\n+ best[b] = (b, score, parts[c_score], parts[c_identity], parts[c_coverage], qlen, length)\n+ #Best hit for final query, if unambiguous:\n+ if current is not None:\n+ if len(best)==1:\n+ yield current, list(best.values())[0]\n+ else:\n+ #print("%s has %i equally good hits: %s" % (a, len(best), ", ".join(best)))\n+ tie_warning += 1\n+\n+\n+#print("Starting...")\n+#TODO - Report log in case of error?\n+run(\'%s -dbtype %s -in "%s" -out "%s" -logfile "%s"\' % (makeblastdb_exe, dbtype, fasta_a, db_a, log))\n+run(\'%s -dbtype %s -in "%s" -out "%s" -logfile "%s"\' % (makeblastdb_exe, dbtype, fasta_b, db_b, log))\n+#print("BLAST databases prepared.")\n+run(\'%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i\'\n+ % (blast_cmd, fasta_a, db_b, a_vs_b, cols, threads))\n+#print("BLAST species A vs species B done.")\n+run(\'%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i\'\n+ % (blast_cmd, fasta_b, db_a, b_vs_a, cols, threads))\n+#print("BLAST species B vs species A done.")\n+\n+\n+best_b_vs_a = dict(best_hits(b_vs_a, self_comparison))\n+\n+\n+count = 0\n+outfile = open(out_file, \'w\')\n+outfile.write("#A_id\\tB_id\\tA_length\\tB_length\\tA_qcovhsp\\tB_qcovhsp\\tlength\\tpident\\tbitscore\\n")\n+for a, (b, a_score_float, a_score_str, a_identity_str, a_coverage_str, a_qlen, a_length) in best_hits(a_vs_b, self_comparison):\n+ if b not in best_b_vs_a:\n+ #Match b has no best hit\n+ continue\n+ a2, b_score_float, b_score_str, b_identity_str, b_coverage_str, b_qlen, b_length = best_b_vs_a[b]\n+ if a != a2:\n+ #Not an RBH\n+ continue\n+ #Start with IDs, lengths, coverage\n+ values = [a, b, a_qlen, b_qlen, a_coverage_str, b_coverage_str]\n+ #Alignment length was an integer so don\'t care about original string\n+ values.append(min(a_length, b_length))\n+ #Output the original string versions of the scores\n+ if float(a_identity_str) < float(b_identity_str):\n+ values.append(a_identity_str)\n+ else:\n+ values.append(b_identity_str)\n+ if a_score_float < b_score_float:\n+ values.append(a_score_str)\n+ else:\n+ values.append(b_score_str)\n+ outfile.write("%s\\t%s\\t%i\\t%i\\t%s\\t%s\\t%i\\t%s\\t%s\\n" % tuple(values))\n+ count += 1\n+outfile.close()\n+print "Done, %i RBH found" % count\n+if tie_warning:\n+ sys.stderr.write("Warning: Sequencies with tied best hits found, you may have duplicates/clusters\\n")\n+\n+#Remove temp files...\n+shutil.rmtree(base_path)\n' |
b |
diff -r 000000000000 -r b828ca44a313 tools/blast_rbh/blast_rbh.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blast_rbh/blast_rbh.xml Mon Aug 04 08:13:39 2014 -0400 |
b |
b'@@ -0,0 +1,239 @@\n+<tool id="blast_reciprocal_best_hits" name="BLAST Reciprocal Best Hits (RBH)" version="0.1.2">\n+ <description>from two FASTA files</description>\n+ <requirements>\n+ <requirement type="binary">makeblastdb</requirement>\n+ <requirement type="binary">blastp</requirement>\n+ <requirement type="binary">blastn</requirement>\n+ <requirement type="package" version="2.2.29">blast+</requirement>\n+ </requirements>\n+ <version_command interpreter="python">\n+blast_rbh.py --version\n+ </version_command>\n+ <command interpreter="python">\n+blast_rbh.py "$fasta_a" "$fasta_b"\n+-a $seq.dbtype\n+#if $seq.dbtype=="nucl"\n+-t $seq.nucl_type\n+#else\n+-t $seq.prot_type\n+#end if\n+-i $identity\n+-c $q_cover\n+-o "$output"\n+ </command>\n+ <stdio>\n+ <!-- Anything other than zero is an error -->\n+ <exit_code range="1:" />\n+ <exit_code range=":-1" />\n+ </stdio>\n+ <inputs>\n+ <!-- Galaxy does not have sub-types for protein vs nucletide FASTA -->\n+ <param name="fasta_a" type="data" format="fasta"\n+\t label="Genes/proteins from species A"\n+\t description="FASTA file, one sequence per gene/protein." /> \n+ <param name="fasta_b" type="data" format="fasta"\n+\t label="Genes/proteins from species B"\n+\t description="FASTA file, one sequence per gene/protein." /> \n+ <conditional name="seq">\n+ <param name="dbtype" type="select" label="Molecule type of FASTA inputs">\n+ <option value="prot">protein</option>\n+ <option value="nucl">nucleotide</option>\n+ </param>\n+ <when value="prot">\n+ <param name="prot_type" type="select" display="radio" label="Type of BLAST">\n+ <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option>\n+ <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option>\n+ </param>\n+ </when>\n+ <when value="nucl">\n+ <param name="nucl_type" type="select" display="radio" label="Type of BLAST">\n+ <option value="megablast">megablast - Traditional megablast used to find very similar (e.g., intraspecies or closely related species) sequences</option>\n+ <option value="blastn">blastn - Traditional BLASTN requiring an exact match of 11, for somewhat similar sequences</option>\n+ <option value="blastn-short">blastn-short - BLASTN program optimized for sequences shorter than 50 bases</option>\n+ <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option>\n+ <option value="tblastx">tblastx - TBLASTX program using translated query against translated database (protein level matches)</option>\n+ </param>\n+ </when>\n+ </conditional>\n+\t<param name="identity" type="float" value="70" min="0" max="100"\n+\t label="Minimum percentage identity for BLAST matches"\n+\t help="Default is 70%, use 0 for no filtering." />\n+ <param name="q_cover" type="float" value="50" min="0" max="100"\n+\t label="Minimum percentage query coverage for BLAST matches"\n+\t help="Default is 50%, use 0 for no filtering." />\n+ </inputs>\n+ <outputs>\n+ <data name="output" format="tabular" label="BLAST RBH: $fasta_a.name vs $fasta_b.name" />\n+ </outputs>\n+ <requirements>\n+ </requirements>\n+ <tests>\n+ <test>\n+ <param name="fasta_a" value="four_human_proteins.fasta" ftype="fasta"/>\n+ <param name="fasta_b" value="rhodopsin_proteins.fasta" ftype="fasta"/>\n+ <param name="dbtype" value="prot"/>\n+ <param name="nucl_type" value="blastp"/>\n+ <param name="identity" value="0.0"/>\n+ <param name="q_cover" va'..b'am name="q_cover" value="0.0"/>\n+ <output name="output" file="rbh_blastp_k12.tabular" ftype="tabular"/>\n+ </test>\n+ <test>\n+ <param name="fasta_a" value="k12_edited_proteins.fasta" ftype="fasta"/>\n+ <param name="fasta_b" value="k12_ten_proteins.fasta" ftype="fasta"/>\n+ <param name="dbtype" value="prot"/>\n+ <param name="nucl_type" value="blastp"/>\n+ <param name="identity" value="0.0"/>\n+ <param name="q_cover" value="0.0"/>\n+ <output name="output" file="rbh_blastp_k12.tabular" ftype="tabular"/>\n+ </test>\n+ <!-- this tests self-comparison -->\n+ <test>\n+ <param name="fasta_a" value="k12_edited_proteins.fasta" ftype="fasta"/>\n+ <param name="fasta_b" value="k12_edited_proteins.fasta" ftype="fasta"/>\n+ <param name="dbtype" value="prot"/>\n+ <param name="nucl_type" value="blastp"/>\n+ <param name="identity" value="80.0"/>\n+ <param name="q_cover" value="80.0"/>\n+ <output name="output" file="rbh_blastp_k12_self.tabular" ftype="tabular"/>\n+ </test>\n+ </tests>\n+ <help>\n+**What it does**\n+\n+Takes two FASTA files (*species A* and *species B*), builds a BLAST database\n+for each, runs reciprocal BLAST searchs (*A vs B*, and *B vs A*), optionally\n+filters the HSPs, and then compiles a list of the reciprocal best hits (RBH).\n+\n+The output from this tool is a tabular file containing multiple columns, with\n+information about the BLAST matches used:\n+\n+====== ==================================\n+Column Description\n+------ ----------------------------------\n+ 1 ID from *species A*\n+ 2 ID from *species B*\n+ 3 Length of sequence *A*\n+ 4 Length of sequence *B*\n+ 5 Percentage of sequence *A* covered\n+ 6 Percentage of sequence *B* covered\n+ 7 HSP alignment length\n+ 8 HSP percentage identity\n+ 9 HSP bitscore\n+====== ==================================\n+\n+These values correspond to the ``qseqid``/``sseqid``, ``qlen``/``slen``,\n+``qcovhsp``, ``length``, ``pident`` and ``bitscore`` values in the BLAST+\n+tabular output.\n+\n+For the alignment length, bitscore and percentage identity the values for\n+*A vs B* and *B vs A* are typically the same, so their minimum is shown.\n+The coverage values are given by the HSP alignment length divided by the\n+sequence length (adjusted by a factor of three for TBLASTX).\n+\n+Note that if a sequence has equally scoring top BLAST matches to multiple\n+sequence in the other file, it will not be considered for an RBH. This\n+can happen following gene duplication, or for (near) identical gene\n+duplicates.\n+\n+.. class:: warningmark\n+\n+**Note**\n+\n+If you are trying to use BLAST RBH matches to identify candidate orthologues\n+or transfer annotation, you *must* use a percentage identity and minimum\n+coverage threshold or similiar. See:\n+\n+Punta and Ofran (2008) The Rough Guide to In Silico Function Prediction,\n+or How To Use Sequence and Structure Information To Predict Protein\n+Function. PLoS Comput Biol 4(10): e1000160.\n+http://dx.doi.org/10.1371/journal.pcbi.1000160\n+\n+The defaults are to require 70% sequence identity over the aligned region\n+(using ``pident`` in the BLAST+ tabular output), and that the HSP alignment\n+covers at least 50% of the query sequence (using ``qcovhsp`` in the BLAST+\n+tabular output).\n+\n+\n+**References**\n+\n+A specific paper covering this tool is planned, but please also cite:\n+\n+Christiam Camacho et al. (2009).\n+BLAST+: architecture and applications.\n+BMC Bioinformatics. 15;10:421.\n+http://dx.doi.org/10.1186/1471-2105-10-421\n+\n+This wrapper is available to install into other Galaxy Instances via the Galaxy\n+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh\n+ </help>\n+ <citations>\n+ <citation type="doi">10.1186/1471-2105-10-421</citation>\n+ <!-- TODO: Add BibTeX entry / preprint DOI for Galaxy BLAST+ paper -->\n+ </citations>\n+</tool>\n' |
b |
diff -r 000000000000 -r b828ca44a313 tools/blast_rbh/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/blast_rbh/tool_dependencies.xml Mon Aug 04 08:13:39 2014 -0400 |
b |
@@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="blast+" version="2.2.29"> + <repository changeset_revision="a2ec897aac2c" name="package_blast_plus_2_2_29" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |