changeset 0:b828ca44a313 draft

Uploaded v0.1.2 (previously only on the Test Tool Shed)
author peterjc
date Mon, 04 Aug 2014 08:13:39 -0400
parents
children ff0b814c1320
files test-data/four_human_proteins.fasta test-data/k12_edited_proteins.fasta test-data/k12_ten_proteins.fasta test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular test-data/rbh_blastp_k12.tabular test-data/rbh_blastp_k12_self.tabular test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rbh_none.tabular test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rhodopsin_nucs.fasta test-data/rhodopsin_proteins.fasta test-data/three_human_mRNA.fasta tools/blast_rbh/README.rst tools/blast_rbh/blast_rbh.py tools/blast_rbh/blast_rbh.xml tools/blast_rbh/tool_dependencies.xml
diffstat 17 files changed, 1211 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+ILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ
+QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQY
+QQAFFQQQMLAQHQPSQQQASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEELLDREFDLLRSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPE
+NLGHRPLLMDSEDEEEEEKHSSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAF
+PNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEME
+FEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES
+ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/k12_edited_proteins.fasta	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,69 @@
+>gi|16127995|ref|NP_414542.1| thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]
+MKRISTTITTTITITTGNGAG
+>gi|16127996|ref|NP_414543.1| fused aspartokinase I and homoserine dehydrogenase I [Escherichia coli str. K-12 substr. MG1655]
+MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERI
+FAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEA
+RGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYS
+AAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPC
+LIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLIT
+QSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL
+ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSW
+LKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAV
+ADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELM
+KFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIE
+IEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFK
+VKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV
+>gi|16127997|ref|NP_414544.1| homoserine kinase [Escherichia coli str. K-12 substr. MG1655]
+MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWE
+RFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHY
+DNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGF
+IHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVA
+DWLGKNYLQNQEGFVHICRLDTAGARVLEN
+>NP_414544_near_copy
+MKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWE
+RFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHY
+DNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGF
+IHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVA
+DWLGKNYLQNQEGFVHICRLDTAGARVLEN
+>gi|16127998|ref|NP_414545.1| threonine synthase [Escherichia coli str. K-12 substr. MG1655]
+MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQE
+ILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAA
+VAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNS
+ANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVP
+RFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTS
+EPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
+RKLMMNHQ
+>NP_414546_near_copy_1
+MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHL
+HGPPPPPRHHKKAPHDHHGGHGPGKHHRV
+>NP_414546_near_copy_2
+MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHL
+HGPPPPPRHHKKAPHDHHGGHGPGKHHRRI
+>gi|16128000|ref|NP_414547.1| peroxide resistance protein, lowers intracellular iron [Escherichia coli str. K-12 substr. MG1655]
+MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPPQISTLMRISDKLAGINAARFHDWQPD
+FTPANARQAILAFKGDVYTGLQAETFSEDDFDFAQQHLRMLSGLYGVLRPLDLMQPYRLEMGIRLENARG
+KDLYQFWGDIITNKLNEALAAQGDNVVINLASDEYFKSVKPKKLNAEIIKPVFLDEKNGKFKIISFYAKK
+ARGLMSRFIIENRLTKPEQLTGFNSEGYFFDEDSSSNGELVFKRYEQR
+>gi|16128001|ref|NP_414548.1| putative transporter [Escherichia coli str. K-12 substr. MG1655]
+MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFGKSLKNSIHPQPGGLTSFQSLCTSLAA
+RVGSGNLAGVALAITAGGPGAVFWMWVAAFIGMATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMR
+WMGVLFAVFLLIAYGIIFSGVQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPL
+MAIIWVLTSLVICVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPN
+AAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQLIQKAMRVLMGSWGAE
+FVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIGGTLLSLPLMWQLADIIMACMA
+ITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRYPDIGRQLSPDAWDDVSQE
+>gi|16128002|ref|NP_414549.1| transaldolase B [Escherichia coli str. K-12 substr. MG1655]
+MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQIPEYRKLIDDAVAWAKQQSNDRAQQI
+VDATDKLAVNIGLEILKLVPGRISTEVDARLSYDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGI
+RAAEQLEKEGINCNLTLLFSFAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIY
+QYYKEHGYETVVMGASFRNIGEILELAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESE
+FLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMIGDLL
+>gi|16128003|ref|NP_414550.1| molybdochelatase incorporating molybdenum into molybdopterin [Escherichia coli str. K-12 substr. MG1655]
+MNTLRIGLVSISDRASSGVYQDKGIPALEEWLTSALTTPFELETRLIPDEQAIIEQTLCELVDEMSCHLV
+LTTGGTGPARRDVTPDATLAVADREMPGFGEQMRQISLHFVPTAILSRQVGVIRKQALILNLPGQPKSIK
+ETLEGVKDAEGNVVVHGIFASVPYCIQLLEGPYVETAPEVVAAFRPKSARRDVSE
+>gi|16128004|ref|NP_414551.1| inner membrane protein, Grp1_Fun34_YaaH family [Escherichia coli str. K-12 substr. MG1655]
+MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIFYGGIAQIFAGLLEYKKGNTFGLTAFT
+SYGSFWLTLVAILLMPKLGLTDAPNAQFLGVYLGLWGVFTLFMFFGTLKGARVLQFVFFSLTVLFALLAI
+GNIAGNAAIIHFAGWIGLICGASAIYLAMGEVLNEQFGRTVLPIGESH
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/k12_ten_proteins.fasta	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,60 @@
+>gi|16127995|ref|NP_414542.1| thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]
+MKRISTTITTTITITTGNGAG
+>gi|16127996|ref|NP_414543.1| fused aspartokinase I and homoserine dehydrogenase I [Escherichia coli str. K-12 substr. MG1655]
+MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERI
+FAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEA
+RGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYS
+AAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPC
+LIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLIT
+QSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL
+ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSW
+LKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAV
+ADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELM
+KFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIE
+IEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFK
+VKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV
+>gi|16127997|ref|NP_414544.1| homoserine kinase [Escherichia coli str. K-12 substr. MG1655]
+MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWE
+RFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHY
+DNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGF
+IHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVA
+DWLGKNYLQNQEGFVHICRLDTAGARVLEN
+>gi|16127998|ref|NP_414545.1| threonine synthase [Escherichia coli str. K-12 substr. MG1655]
+MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQE
+ILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAA
+VAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNS
+ANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVP
+RFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTS
+EPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
+RKLMMNHQ
+>gi|16127999|ref|NP_414546.1| hypothetical protein b0005 [Escherichia coli str. K-12 substr. MG1655]
+MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHL
+HGPPPPPRHHKKAPHDHHGGHGPGKHHR
+>gi|16128000|ref|NP_414547.1| peroxide resistance protein, lowers intracellular iron [Escherichia coli str. K-12 substr. MG1655]
+MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPPQISTLMRISDKLAGINAARFHDWQPD
+FTPANARQAILAFKGDVYTGLQAETFSEDDFDFAQQHLRMLSGLYGVLRPLDLMQPYRLEMGIRLENARG
+KDLYQFWGDIITNKLNEALAAQGDNVVINLASDEYFKSVKPKKLNAEIIKPVFLDEKNGKFKIISFYAKK
+ARGLMSRFIIENRLTKPEQLTGFNSEGYFFDEDSSSNGELVFKRYEQR
+>gi|16128001|ref|NP_414548.1| putative transporter [Escherichia coli str. K-12 substr. MG1655]
+MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFGKSLKNSIHPQPGGLTSFQSLCTSLAA
+RVGSGNLAGVALAITAGGPGAVFWMWVAAFIGMATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMR
+WMGVLFAVFLLIAYGIIFSGVQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPL
+MAIIWVLTSLVICVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPN
+AAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQLIQKAMRVLMGSWGAE
+FVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIGGTLLSLPLMWQLADIIMACMA
+ITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRYPDIGRQLSPDAWDDVSQE
+>gi|16128002|ref|NP_414549.1| transaldolase B [Escherichia coli str. K-12 substr. MG1655]
+MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQIPEYRKLIDDAVAWAKQQSNDRAQQI
+VDATDKLAVNIGLEILKLVPGRISTEVDARLSYDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGI
+RAAEQLEKEGINCNLTLLFSFAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIY
+QYYKEHGYETVVMGASFRNIGEILELAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESE
+FLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMIGDLL
+>gi|16128003|ref|NP_414550.1| molybdochelatase incorporating molybdenum into molybdopterin [Escherichia coli str. K-12 substr. MG1655]
+MNTLRIGLVSISDRASSGVYQDKGIPALEEWLTSALTTPFELETRLIPDEQAIIEQTLCELVDEMSCHLV
+LTTGGTGPARRDVTPDATLAVADREMPGFGEQMRQISLHFVPTAILSRQVGVIRKQALILNLPGQPKSIK
+ETLEGVKDAEGNVVVHGIFASVPYCIQLLEGPYVETAPEVVAAFRPKSARRDVSE
+>gi|16128004|ref|NP_414551.1| inner membrane protein, Grp1_Fun34_YaaH family [Escherichia coli str. K-12 substr. MG1655]
+MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIFYGGIAQIFAGLLEYKKGNTFGLTAFT
+SYGSFWLTLVAILLMPKLGLTDAPNAQFLGVYLGLWGVFTLFMFFGTLKGARVLQFVFFSLTVLFALLAI
+GNIAGNAAIIHFAGWIGLICGASAIYLAMGEVLNEQFGRTVLPIGESH
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,2 @@
+#A_id	B_id	A_length	B_length	A_qcovhsp	B_qcovhsp	length	pident	bitscore
+ENA|BC112106|BC112106.1	gi|57163782|ref|NM_001009242.1|	1213	1047	86	100	1047	92.07	 1514
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,2 @@
+#A_id	B_id	A_length	B_length	A_qcovhsp	B_qcovhsp	length	pident	bitscore
+sp|P08100|OPSD_HUMAN	gi|57163783|ref|NP_001009242.1|	348	348	100	100	348	96.55	  701
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbh_blastp_k12.tabular	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,10 @@
+#A_id	B_id	A_length	B_length	A_qcovhsp	B_qcovhsp	length	pident	bitscore
+gi|16127995|ref|NP_414542.1|	gi|16127995|ref|NP_414542.1|	21	21	100	100	21	100.00	38.1
+gi|16127996|ref|NP_414543.1|	gi|16127996|ref|NP_414543.1|	820	820	100	100	820	100.00	 1687
+gi|16127997|ref|NP_414544.1|	gi|16127997|ref|NP_414544.1|	310	310	100	100	310	100.00	  642
+gi|16127998|ref|NP_414545.1|	gi|16127998|ref|NP_414545.1|	428	428	100	100	428	100.00	  882
+gi|16128000|ref|NP_414547.1|	gi|16128000|ref|NP_414547.1|	258	258	100	100	258	100.00	  531
+gi|16128001|ref|NP_414548.1|	gi|16128001|ref|NP_414548.1|	476	476	100	100	476	100.00	  959
+gi|16128002|ref|NP_414549.1|	gi|16128002|ref|NP_414549.1|	317	317	100	100	317	100.00	  648
+gi|16128003|ref|NP_414550.1|	gi|16128003|ref|NP_414550.1|	195	195	100	100	195	100.00	  397
+gi|16128004|ref|NP_414551.1|	gi|16128004|ref|NP_414551.1|	188	188	100	100	188	100.00	  365
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbh_blastp_k12_self.tabular	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,5 @@
+#A_id	B_id	A_length	B_length	A_qcovhsp	B_qcovhsp	length	pident	bitscore
+gi|16127997|ref|NP_414544.1|	NP_414544_near_copy	310	309	99	100	309	99.68	  638
+NP_414544_near_copy	gi|16127997|ref|NP_414544.1|	309	310	100	99	309	99.68	  638
+NP_414546_near_copy_1	NP_414546_near_copy_2	99	100	99	98	98	100.00	  197
+NP_414546_near_copy_2	NP_414546_near_copy_1	100	99	98	99	98	100.00	  197
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,2 @@
+#A_id	B_id	A_length	B_length	A_qcovhsp	B_qcovhsp	length	pident	bitscore
+gi|57163782|ref|NM_001009242.1|	ENA|BC112106|BC112106.1	1047	1213	100	86	1047	92.07	 1474
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbh_none.tabular	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,1 @@
+#A_id	B_id	A_length	B_length	A_qcovhsp	B_qcovhsp	length	pident	bitscore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,2 @@
+#A_id	B_id	A_length	B_length	A_qcovhsp	B_qcovhsp	length	pident	bitscore
+gi|57163782|ref|NM_001009242.1|	ENA|BC112106|BC112106.1	1047	1213	66	57	230	97.39	  559
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_nucs.fasta	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,161 @@
+>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA
+ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCT
+TCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCT
+CATCGTGCTTGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACG
+CCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCA
+CCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGC
+CACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTGAGCGGTACGTGGTGGTGTGT
+AAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACCTGGGTCATGG
+CACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATG
+CGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTC
+CACTTCACCATCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGG
+CAGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCAT
+GGTCATTGCTTTCCTGATCTGTTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGG
+TCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCTTCGCAAAGTCCTCCTCCATCTACAACCCTG
+TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCC
+ACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACCGGCCTAA
+
+>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds
+TCTTTCTAGTTTGGGGGGGGGGACTTTAAAGAGCCGCCAATATGAACGGAACAGAAGGCCCAAACTTTTA
+CATACCCATGTCCAACAAGACTGGGGTGGTGCGAAGCCCCTTTGAATACCCTCAGTATTACCTGGCAGAG
+CCATGGCAATATTCCATTCTGTGCGCGTACATGTTCCTGCTCATTCTACTTGGGTTCCCAATCAACTTCA
+TGACCTTGTACGTCACCATCCAGCACAAGAAGCTCCGGACACCCTTAAACTATATCCTGCTGAATTTGGC
+CTTTGCCAACCACTTCATGGTCCTGTGTGGATTCACGGTGACAATGTACTCCTCAATGAACGGATACTTC
+ATCCTCGGAGCCACCGGTTGCTATGTTGAAGGCTTCTTCGCTACCCTTGGTGGTGAAATCGCCCTTTGGT
+CCCTGGTGGTCTTGGCCATTGAACGATACGTGGTCGTCTGTAAGCCCATGAGCAACTTCCGATTTAGTGA
+GAACCATGCCGTCATGGGCGTAGCGTTCACCTGGATAATGGCTTTGTCCTGTGCTGTTCCTCCACTCCTT
+GGATGGTCCAGGTACATCCCCGAGGGCATGCAGTGCTCCTGCGGAGTCGACTACTACACCCTGAAGCCCG
+AGGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTCGTCCACTTCACCATCCCCCTGATTATCATTTT
+CTTCTGCTATGGCCGCCTGGTGTGCACTGTGAAAGAGGCTGCAGCTCAACAGCAAGAGTCCGCCACCACC
+CAGAAGGCCGAGAAAGAGGTGACCAGGATGGTGATCATCATGGTGGTCTTCTTCCTTATCTGTTGGGTCC
+CCTACGCCTCTGTCGCTTTCTTCATCTTCAGCAATCAGGGCTCTGAGTTCGGCCCCATCTTCATGACCGT
+CCCAGCTTTCTTTGCCAAGAGTTCTTCCATCTACAACCCCGTCATCTACATCATGCTCAACAAGCAGTTC
+CGTAACTGCATGATCACCACCCTGTGCTGCGGCAAGAATCCCTTTGGAGAAGACGATGCCTCCTCTGCCG
+CCACCTCCAAGACAGAGGCTTCTTCTGTTTCTTCCAGCCAGGTGTCTCCTGCATAAGACCTTCCACCAGG
+CCTGTCTCAGGGTCCGCTGCCTCACACAGCTCCCACCGCCCCAACTCCGTCTCCTGCTCGCTAAGGCGGC
+GAAGTTCCCCTTCCATTACATAAAACGTATCTGTTCAAGAAAGGCGACGACGAAGGAGAAGAAGAGGAGC
+CCCCCCGAACCCCTTCGCTGCTGCTGAAAACGACTTGATTGCTTCTGCAACGCAACGGGGCCTTACGGCA
+GCGAAGGGGTTGTCATCCGGACGCGCCAAGAATTCCTTCGAGACTGTAAATATCTTAAAGGAACCGTCCT
+GCTAGTTACCGACGCCGCTCCTGTAGCCGCCGTTCCCCCGCACTCCGGCCGGTTCATACCTCTTATTTTT
+TTGCAATGCAACAGAAAATAATATTTTTGTTCCCACGGCTTTTCCCGGTCAGGTCTGGTAGTGGCGGAGA
+TTGGCCGACCCCTCGCACCTGTAATAAAGCGCAG
+
+>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds
+GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGC
+CATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCT
+CACGCTCTATGTCACGGTTCAGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCC
+GTGGCCGACCTCTTCATGGTCTTCGGAGGCTTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTG
+TCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGCCACCCTGGGAGGTATGAGCTGAGATGCGGG
+TAAGGAGGAGGCATAGAGGCATCTGGGAACAGTCCCAAGCTTGGGGTGAAGGCTAAGAGGCCTTCTTCCT
+TGTTCTGTCATTGGCGTCGTCCGAAGCCCTCACTTAATCAACAAACAGTTTGGTGGTGAGGCGCTGAGCT
+CCATTTGGAGAGGGCAGGTATCGAGCACTGTTTTATCCCCCCTGGAGTGGTGCCATTGCCTTGCTTTACA
+GCAAAGAAACTGAGGATGAGAGGAGTCGAGGGTCTTGCCAGGTCACATCATGGCAGAGACAGAGCTGAGT
+TTCAACCCTGCATCTATGTGCAGTTTCCCTTGGAGCAGCTATGTTAGGTCAGACCCACGGTGGGCACTGG
+GGAGAGAGCTGCACAAGACAGGTCCCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTCCTGATTGCCA
+GGAGTGATGTGCAGCGCAAATGTCTGAATTCCATTATTATGTGCTCCTTCTTCCTCTGAGCCAAACATCC
+ATCTTCATGGCTCCTAGAATTGGGTCCCACCCACATGAGCAGGTCATTTTGTTTCCCTAGAGGGGAGAGG
+TCACTGCTGTGGAGGGAGGGAAGGTTCGTCCCGCTCCATGTTTCTGTTGTCTCTGCAATGCCTTTCTCTA
+GGGACTCTGCCTATTGCCCCAAGAAGGACACATTCTTCTGTAAAAACTCCCTCCTGGGTTCCCAGTCTAA
+TCAAGACCTCTAAACTGATTTCCATGTCCCTCATGAACCCAAAGCTCTAACTGAATTAAACTTCTCAGGA
+CTTACTCCACTCTCCTCGTCCATCATGCAGCCCCTCTGCCCAGCACCCTATCTCCTCTTCTTCCCAGTGT
+CTGAGCCCACTGTACCCTGAGACTTCGCTCCAGGCCTGCCCCAGGCTGCCTTCTCAGGTGCCCTCTCCCA
+CATAGGAGGAGCACGGCCTCCTTAGACAGACGTGGGGTGCAGGTTGGTGGCATGCTGACTGATAGCTGAC
+TGCCTTGCAGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTATGCA
+AGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCCTTGCCCTCACCTGGGTCATGGC
+ACTGGCCTGCGCCGCGCCCCCGCTAGTCGGCTGGTCCAGGTAATGGCACTGAACAGAAGGGAAGTGCCTC
+TGAGGTCTTCTTAGGGTCCCCCAGCTGGGACTCAAACCTAGGGCTGTCTGGTTCCAGGCACGGAACTGGC
+GACTCCACTGGGGTTGGGGTTTAGGGCAAGGAAGGAGAGGATCAGACCCTAATGTTGTTACGTGGGTTGG
+TCCGCATGTCAAGGAGAATCCAAGACACCCAATCCTTCACCTTGGCTGTGCCCCTAATCCTCATCTAAGC
+CAGGTTCAGATTCCAATCCTCTTTGGCCCAGTGCTCCGTGGGAAGCTCCCTCTGACCTTGGGCCTCAGCG
+CCTGGGGTTGCTGAGCCTTCCTAGTATAGGTGGTGACATCGTAGCCCCTGGGACCTGGATCCTGCCCAGT
+CTGCAGGCCATCATCTCCAAATGGGGCTGAGATGAGATGTGAGGAAAGAGGGGAGACAGTGGTTTGGAAA
+ACTGGACTGGTGGCTTTTTTGGGTTTCCAGAGGACTCATCTTCCTCTGCTTCTAGAATATTCCCACTCTC
+TCTTCCCTTTCCTCATTCTTCCTGGGTTATTTTTTTTTCCCTTTGCTGAATTCGAGCCCCATTCCCTCCA
+GCCTCTTTCCCTGTCTTATCTAGCCCAGTCCAGTTATATTCTCATAGGCAGAGGCAACAGATGCTCCAAA
+TTTTCTGAGGTCGGTTCCAACATCGCCACCCTCTAAAATCAGTGAAACATCCTAACTACATGCCTCATAG
+TCCTCCTGTTTCCAAAAACTGCAAAGATCTCCTGGTTACCCTGTATGCCCATCTTTGGGCTAGAAAATCC
+TCTCACCCTGTTAATAGTAAGACCCTGGTTTGTACAAACTGCCTCAAACACAGAGTTTAGGGGCTTTTCC
+CTTCTCTCCGCCAACCTCTGACAGGCAGAGTCTGAGGCCTGGCCTCCAGCTGCTGCGGGGAGCAGGTCTG
+GTAAAGAATCCTGTGCAGGTCAGTGGTATACAGGTCCTGTCAGGTGACAGCCTGGGCGAGAGACTGGAAA
+GTATCAGGATAACACGGCTGCCAGACGAACAACAAAACAACACTGAATTCACAAGGCGCATTCGAATCCT
+CTCTCAGTCCATTTGATCCTCAGTCACACAGCCGAGTAGACACTTTATCAACTCATTTAACAGAAAGGGA
+AAGTGAAGCCCAGAGCGAGGCCAGCAACGTGGCAGGTCACTCTGGTCATCTAGGGCCTGTTCCCAACTCT
+TTCACATGTGGGTCTCCAATATGTTCCCTCCTGTCCCAATCTCTGCCGGCCCTCAGGTACATCCCAGAGG
+GCATGCAGTGCTCATGTGGAATCGACTACTACACCCTCAAGCCGGAGGTCAACAACGAGTCCTTTGTCAT
+CTACATGTTCGTGGTCCACTTCACCATCCCTATGATTGTCATATTCTTTTGCTATGGACAGCTGGTCTTC
+ACCGTCAAGGAGGTAAGGTCATGTGTTGGGCACTGGGGACATGCACACTGAGTGAATGGAGCCCAGCTCC
+ATTCCCAGAGTTGCCACAGTCTGGACACCTGACCTTGTGTCCCTGCAGGCAGCTGCCCAGCAGCAGGAGT
+CAGCCACCACCCAGAAGGCCGAGAAGGAGGTCACCCGTATGGTCATCATCATGGTCATTGCTTTCCTAAT
+CTGTTGGCTGCCGTATGCCGGCGTGGCATTCTACATCTTCACCCACCAGGGCTCTAACTTTGGCCCCATC
+TTCATGACCCTCCCGGCATTCTTTGCCAAGTCGTCCTCCATCTACAACCCTGTCATCTATATCATGATGA
+ACAAGCAGGTGCCAGGTGGTAGGGAGGGAGGGTCTGGGTCCCCCAGGCTGCAGGCACTGCCCACAGAGGA
+CAAGCCACATCCTTGACTAGGCAGACCCCAGTCTTCCCATCTGCAAAATTAGGCAGGGGAGTTCGTCTCC
+CCCAGGCATCAGAGACATCGGGGAGAAATGCACATTTCTGGAGATGAATCAGCATCTCAGGGTGGGCCCA
+GGAACCTGCACTTCTAAAAACCATTCCACATGACTCTGAGGCTAGCATGAGAAGTGATGATCCACATGGT
+TCTGGAGGCCTGCTTTAAAAGTCAAGTGGTCAAAGTCCCAAGCCTGGGAACGGGATGGTGCCAGTCTCCA
+TTAAAGAGATCAAAAGGAGCTAGAAAGTCTTGTGATGAAAGATGAAGGGATAAAGCCGTCCTTTAACACA
+GATCAGTGATTTCTCTGCAGAATCCATGACCCAGTGGGAAAAAGTGGTCCCTGGAGTCAGGCATATTGGA
+TTCAAATCCTAGCTCTGCTATTTTCTAGCTATGTAACCTTGGGCAAGTCATCTCCCTTCTCTGTGCTTCA
+GTTTCTTCTTTCATAGAAAGGGTAAAATCCCAAACTCTTGGGTTAAATGAGATAACTTACATAGCCCTTG
+ATATGCAGAGGCATTATGGAATGTCGTTAGTGACAAAGTTCCCTTGGGTTTGGTCCCTGGTATCTCTGGA
+GTGAGATTGCATATGTTCCCTTCAGAGGGTCAGATTTGGGATGAGAGTGGAGGCTGCGAGGGCCTGAGTG
+GGAAGGGATTGGAGGCAAATCTCACCAACCATGTCAGTTTGCTACACACACTTTGGGTGGACCCTGACCC
+TGACTCATGCTTCTTGCCTTCCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCCAC
+TGGGTGACGATGAGGCCTCCACCACTGCCTC
+
+>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds
+GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGC
+CCTGGCAGTTCTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCT
+CACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCT
+GTGGCCAACCTCTTCATGGTCTTTGGAGGCTTCACCACCACCCTGTATACCTCTATGCATGGATACTTCG
+TCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGCCACGCTGGGCGGTGAAATCGCCCTGTGGTC
+CCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGAGCAACTTCCGCTTTGGGGAG
+AACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCCCCACTAGCCG
+GCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGA
+GGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTC
+TTCTGCTACGGACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCC
+AGAAGGCCGAGAAGGAAGTCACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCC
+CTACGCCAGCGTGGCATTCTACATCTTTACCCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATC
+CCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGGTCATCTATATCATGATGAACAAGCAGTTCC
+GGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAAGCATCCACCACTGC
+CTC
+
+>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds
+ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCT
+TCGAGGCGCCGCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCT
+GATCATGCTTGGCTTCCCCATCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACC
+CCCCTCAACTACATCCTGCTCAACCTGGCCGTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCA
+CCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCCGACGGGCTGCAACCTCGAGGGCTTCTTTGC
+CACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCGAGCGGTACGTAGTGGTGTGC
+AAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACCTGGGTCATGG
+CTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTG
+CGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTA
+CACTTCATCATCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTG
+CAGCCCAGCAGCAGGAGTCGGCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCAT
+GGTCATCGCTTTCCTCATATGCTGGCTGCCCTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGA
+TCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCTTTGCCAAGACGTCTGCCGTCTATAACCCCG
+TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACTCTCTGCTGTGGCAAGAACCC
+CCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCCTGCCTAA
+
+>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds
+CCGCTACTGACGAACCGCAACCATGAACGGCACTGAGGGACCTAACTTCTACATCCCCATGTCAAACGCC
+ACTGGTGTAGTGAGGAGTCCATTTGAATACCCGCAGTACTACCTTGCAGAACCATGGGCTTTCTCAGCTC
+TGTCTGCCTACATGTTCTTCCTGATTATCGCCGGATTCCCCATCAACTTCCTCACCCTGTATGTCACCAT
+CGAACATAAGAAACTGAGGACCCCACTGAACTACATTCTGCTGAACCTGGCCGTGGCCGACCTCTTCATG
+GTGTTTGGCGGATTCACCACCACGATGTACACCTCCATGCACGGCTACTTTGTCTTCGGCCCCACCGGCT
+GCAACATCGAAGGGTTCTTCGCCACCCTCGGCGGCGAGATTGCCCTCTGGTGCCTCGTTGTCCTGGCCAT
+TGAAAGGTGGATGGTCGTCTGCAAGCCAGTGACCAATTTCCGCTTCGGTGAGAGCCATGCCATCATGGGT
+GTCATGGTGACCTGGACCATGGCATTGGCCTGTGCCCTCCCCCCTCTCTTCGGCTGGTCTCGGTACATTC
+CGGAAGGTCTGCAGTGCTCGTGCGGGATCGACTACTATACCCGGGCGCCTGGGATCAACAATGAGTCCTT
+TGTGATCTACATGTTTACCTGCCACTTCTCCATCCCACTCGCCGTCATCTCTTTCTGCTACGGCCGACTG
+GTGTGCACCGTCAAAGAGGCCGCTGCCCAGCAACAGGAGTCCGAGACCACCCAGAGGGCTGAGCGGGAGG
+TCACCCGCATGGTCGTCATCATGGTCATCTCCTTCCTGGTCTGCTGGGTGCCCTATGCCAGTGTGGCCTG
+GTACATCTTTACCCACCAGGGAAGCACTTTTGGGCCCATCTTCATGACCATTCCATCCTTCTTTGCCAAG
+AGTTCAGCCCTCTACAACCCCATGATCTACATCTGCATGAACAAGCAGTTCCGCCATTGCATGATCACCA
+CCCTCTGCTGTGGGAAGAACCCCTTCGAGGAGGAGGATGGAGCGTCCGCCACTAGCTCTAAAACTGAGGC
+TTCATCCGTGTCCTCCAGCTCTGTCTCCCCGGCATAAACCTTGTTTGACCGAACACCACGCATCAACACA
+AAGACCAAGAATGCTGACTAAATGCTAACATTTCAGGGAAATCCAAAGACTTTTTACTATTTTTTTACAC
+AACCATATAGGTTGCAAACAGAGGTTTAGCCCTGTTTACAGGTTGTCATCAATGTGATGTCAGTATGTAC
+AATATAGTCAACTTGATAGCAAGTTGTTGGCTTATTTCAGATTGTATGGGCAATGTAATCAACCATATGT
+GAAATAAATTGCAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_proteins.fasta	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,43 @@
+>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]
+MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT
+PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC
+KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVV
+HFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG
+SNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA
+
+>gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin
+MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRT
+PLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVC
+KPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVV
+HFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQG
+SEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQ
+VSPA
+
+>gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
+VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
+VADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE
+NHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF
+FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTL
+PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS
+
+>gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus]
+VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
+VANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE
+NHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF
+FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTI
+PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS
+
+>gi|223523|prf||0811197A rhodopsin [Bos taurus]
+MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRT
+PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC
+KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVH
+FIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGS
+DFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA
+
+>gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster]
+MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRT
+PLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVC
+KPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTC
+HFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQG
+STFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSS
+VSPA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/three_human_mRNA.fasta	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,183 @@
+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds.
+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG
+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC
+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC
+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA
+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC
+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT
+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA
+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA
+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA
+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC
+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA
+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT
+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG
+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA
+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA
+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG
+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA
+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC
+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT
+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA
+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC
+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA
+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG
+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC
+TATGTGTATTTTTATTTTGAATAAACAGAAAGAAATTTTGGGTTTTTAATTTTTTTCTCC
+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTAAAAAAAAAAAAACCTGCTA
+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT
+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA
+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT
+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT
+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA
+TTGCTTATGAAAATTCCATAGTGGTATTTTTTTGGATTCTTAATGTGTAACTTAAACATA
+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA
+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG
+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG
+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT
+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT
+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACTT
+TTTTTCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG
+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA
+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT
+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC
+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT
+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT
+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT
+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT
+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT
+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA
+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA
+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA
+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT
+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT
+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT
+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT
+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA
+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT
+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT
+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT
+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT
+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG
+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC
+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT
+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA
+GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT
+TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT
+GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT
+TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG
+GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA
+CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA
+CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA
+GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA
+GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT
+GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG
+GAAACTTTTAGATGACATTCTACAAATTATTTTTTTCTTTAAATTAAAAGAACCTAGCCA
+ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA
+AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA
+ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA
+GGTTGTCAAGAAGGCTTTTTTTTTTTTCTTTTTTAAACCTGAGGGCAAAAAGGAATGGAT
+AAAGTAGTGTAATGGATTGACAATCAGGAAGAACAGAATAACTCAGTTTTTTTTTCTCCT
+ACAAGGAGATATGGCTGGACCAAAATAAAATGACATGAAATTGCAAAAATGAAAAT
+>ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds.
+GGGGGGCTGCGCGGCCGGGTCGGTGCGCACACGAGAAGGACGCGCGGCCCCCAGCGCTCT
+TGGGGGCCGCCTCGGAGCATGACCCCCGCGGGCCAGCGCCGCGCGCCTGATCCGAGGAGA
+CCCCGCGCTCCCGCAGCCATGGGCACCGGGGGCCGGCGGGGGGCGGCGGCCGCGCCGCTG
+CTGGTGGCGGTGGCCGCGCTGCTACTGGGCGCCGCGGGCCACCTGTACCCCGGAGAGGTG
+TGTCCCGGCATGGATATCCGGAACAACCTCACTAGGTTGCATGAGCTGGAGAATTGCTCT
+GTCATCGAAGGACACTTGCAGATACTCTTGATGTTCAAAACGAGGCCCGAAGATTTCCGA
+GACCTCAGTTTCCCCAAACTCATCATGATCACTGATTACTTGCTGCTCTTCCGGGTCTAT
+GGGCTCGAGAGCCTGAAGGACCTGTTCCCCAACCTCACGGTCATCCGGGGATCACGACTG
+TTCTTTAACTACGCGCTGGTCATCTTCGAGATGGTTCACCTCAAGGAACTCGGCCTCTAC
+AACCTGATGAACATCACCCGGGGTTCTGTCCGCATCGAGAAGAACAATGAGCTCTGTTAC
+TTGGCCACTATCGACTGGTCCCGTATCCTGGATTCCGTGGAGGATAATCACATCGTGTTG
+AACAAAGATGACAACGAGGAGTGTGGAGACATCTGTCCGGGTACCGCGAAGGGCAAGACC
+AACTGCCCCGCCACCGTCATCAACGGGCAGTTTGTCGAACGATGTTGGACTCATAGTCAC
+TGCCAGAAAGTTTGCCCGACCATCTGTAAGTCACACGGCTGCACCGCCGAAGGCCTCTGT
+TGCCACAGCGAGTGCCTGGGCAACTGTTCTCAGCCCGACGACCCCACCAAGTGCGTGGCC
+TGCCGCAACTTCTACCTGGACGGCAGGTGTGTGGAGACCTGCCCGCCCCCGTACTACCAC
+TTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAGGACCTGCACCACAAATGCAAG
+AACTCGCGGAGGCAGGGCTGCCACCAATACGTCATTCACAACAACAAGTGCATCCCTGAG
+TGTCCCTCCGGGTACACGATGAATTCCAGCAACTTGCTGTGCACCCCATGCCTGGGTCCC
+TGTCCCAAGGTGTGCCACCTCCTAGAAGGCGAGAAGACCATCGACTCGGTGACGTCTGCC
+CAGGAGCTCCGAGGATGCACCGTCATCAACGGGAGTCTGATCATCAACATTCGAGGAGGC
+AACAATCTGGCAGCTGAGCTAGAAGCCAACCTCGGCCTCATTGAAGAAATTTCAGGGTAT
+CTAAAAATCCGCCGATCCTACGCTCTGGTGTCACTTTCCTTCTTCCGGAAGTTACGTCTG
+ATTCGAGGAGAGACCTTGGAAATTGGGAACTACTCCTTCTATGCCTTGGACAACCAGAAC
+CTAAGGCAGCTCTGGGACTGGAGCAAACACAACCTCACCACCACTCAGGGGAAACTCTTC
+TTCCACTATAACCCCAAACTCTGCTTGTCAGAAATCCACAAGATGGAAGAAGTTTCAGGA
+ACCAAGGGGCGCCAGGAGAGAAACGACATTGCCCTGAAGACCAATGGGGACAAGGCATCC
+TGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTG
+AGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTAC
+AAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAAC
+AGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAAC
+CACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAG
+ACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTAT
+GTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCA
+TCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCAC
+TACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGC
+CTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT
+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG
+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT
+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC
+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG
+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG
+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC
+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC
+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT
+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG
+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG
+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT
+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC
+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT
+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT
+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT
+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC
+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC
+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG
+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG
+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC
+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC
+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA
+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG
+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA
+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC
+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC
+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA
+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT
+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA
+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG
+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT
+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC
+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC
+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG
+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT
+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT
+ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC
+TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA
+GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT
+TTTTTCGTTCCCCCCACCCGCCCCCAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT
+CTTTTTTTTTTTTTTTTTTTTTTTTTTTTGCTGGTGTCTGAGCTTCAGTATAAAAGACAA
+AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA
+>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds.
+CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC
+CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG
+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG
+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC
+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT
+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC
+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT
+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG
+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC
+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA
+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC
+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC
+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG
+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC
+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG
+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA
+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG
+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT
+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG
+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA
+GCCATCCCACCAG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blast_rbh/README.rst	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,111 @@
+Galaxy tool to find BLAST Reciprocal Best Hits (RBH)
+====================================================
+
+This tool is copyright 2011-2014 by Peter Cock, The James Hutton Institute
+(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
+See the licence text below.
+
+This tool is a short Python script to run reciprocal BLAST searches on a
+pair of sequence files, and extract the reciprocal best hits.
+
+This is a work in progress, and builds on an earlier implementation which
+prequired the two BLAST searches be prepared in advance. Integration allows
+a much simpler user experience, and can ensure sensible filters are used.
+
+
+Automated Installation
+======================
+
+Installation via the Galaxy Tool Shed should take care of the Galaxy side of
+things, including the dependency the NCBI BLAST+ binaries.
+
+
+Manual Installation
+===================
+
+There are just two files to install:
+
+- ``blast_rbh.py`` (the Python script)
+- ``blast_rbh.xml`` (the Galaxy tool definition)
+
+The suggested location is in a ``tools/blast_rbh/`` folder. You will then
+need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool
+by adding the line::
+
+    <tool file="blast_rbh/blast_rbh.xml" />
+
+If you want to run the functional tests, include the same line in your
+``tool_conf.xml.sample`` file, and the sample test files under Galaxy's
+``test-data/`` directory. Then::
+
+    ./run_functional_tests.sh -id blast_reciprocal_best_hits
+
+You will need to have the NCBI BLAST+ binaries installed and on the ``$PATH``.
+
+
+History
+=======
+
+======= ======================================================================
+Version Changes
+------- ----------------------------------------------------------------------
+v0.1.0  - Initial Test Tool Shed release, targetting NCBI BLAST+ 2.2.29
+v0.1.1  - Supports self-comparison, sometimes useful for spotting duplicates.
+v0.1.2  - Using optparse for command line API.
+        - Fixed Tool Shed dependency definition.
+======= ======================================================================
+
+
+Developers
+==========
+
+This tool is developed on the following GitHub repository:
+https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh
+
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use
+the following command from the Galaxy root folder::
+
+    $ tar -czf blast_rbh.tar.gz tools/blast_rbh/README.rst tools/blast_rbh/blast_rbh.xml tools/blast_rbh/blast_rbh.py tools/blast_rbh/tool_dependencies.xml test-data/rhodopsin_nucs.fasta test-data/rhodopsin_proteins.fasta test-data/three_human_mRNA.fasta test-data/four_human_proteins.fasta test-data/k12_edited_proteins.fasta test-data/k12_ten_proteins.fasta test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular test-data/rbh_none.tabular test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rbh_blastp_k12.tabular test-data/rbh_blastp_k12_self.tabular
+
+Check this worked::
+
+    $ tar -tzf blast_rbh.tar.gz
+    tools/blast_rbh/README.rst
+    tools/blast_rbh/blast_rbh.xml
+    tools/blast_rbh/blast_rbh.py
+    tools/blast_rbh/tool_dependencies.xml
+    test-data/rhodopsin_nucs.fasta
+    test-data/rhodopsin_proteins.fasta
+    test-data/three_human_mRNA.fasta
+    test-data/four_human_proteins.fasta
+    test-data/k12_edited_proteins.fasta
+    test-data/k12_ten_proteins.fasta
+    test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular
+    test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular
+    test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular
+    test-data/rbh_none.tabular
+    test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular
+    test-data/rbh_blastp_k12.tabular
+    test-data/rbh_blastp_k12_self.tabular
+
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blast_rbh/blast_rbh.py	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,254 @@
+#!/usr/bin/env python
+"""BLAST Reciprocal Best Hit (RBH) from two FASTA input files.
+
+Takes the following command line options,
+1. FASTA filename of species A
+2. FASTA filename of species B
+3. Sequence type (prot/nucl)
+4. BLAST type (e.g. blastn, or blastp) consistent with sequence type
+5. Minimum BLAST Percentage identity
+6. Minimum BLAST query coverage
+7. Output filename
+"""
+
+# TODO - Output more columns, e.g. pident, qcovs, descriptions?
+
+import os
+import sys
+import tempfile
+import shutil
+from optparse import OptionParser
+
+def stop_err( msg ):
+    sys.stderr.write("%s\n" % msg)
+    sys.exit(1)
+
+def run(cmd):
+    return_code = os.system(cmd)
+    if return_code:
+        stop_err("Error %i from: %s" % (return_code, cmd))
+
+if "--version" in sys.argv[1:]:
+    #TODO - Capture version of BLAST+ binaries too?
+    print "BLAST RBH v0.1.2"
+    sys.exit(0)
+
+#Parse Command Line
+usage = """Use as follows:
+
+$ python blast_rbh.py [options] A.fasta B.fasta
+"""
+
+parser = OptionParser(usage=usage)
+parser.add_option("-a", "--alphabet", dest="dbtype",
+                  default=None,
+                  help="Alphabet type (nucl or prot)")
+parser.add_option("-t", "--task", dest="task",
+                  default=None,
+                  help="BLAST task (e.g. blastp, blastn, megablast)")
+parser.add_option("-i","--identity", dest="min_identity",
+                  default="0",
+                  help="Minimum percentage identity (optional, default 0)")
+parser.add_option("-c", "--coverage", dest="min_coverage",
+                  default="0",
+                  help="Minimum HSP coverage (optional, default 0)")
+parser.add_option("-o", "--output", dest="output",
+                  default=None, metavar="FILE",
+                  help="Output filename")
+options, args = parser.parse_args()
+
+if len(args) != 2:
+    stop_err("Expects two input FASTA filenames")
+fasta_a, fasta_b = args
+if not os.path.isfile(fasta_a):
+    stop_err("Missing input file for species A: %r" % fasta_a)
+if not os.path.isfile(fasta_b):
+    stop_err("Missing input file for species B: %r" % fasta_b)
+if os.path.abspath(fasta_a) == os.path.abspath(fasta_b):
+    self_comparison = True
+    print("Doing self comparison; ignoring self matches.")
+else:
+    self_comparison = False
+
+if not options.output:
+    stop_err("Output filename required, e.g. -o example.tab")
+out_file = options.output
+
+try:
+    min_identity = float(options.min_identity)
+except ValueError:
+    stop_err("Expected number between 0 and 100 for minimum identity, not %r" % min_identity)
+if not (0 <= min_identity <= 100):
+    stop_err("Expected minimum identity between 0 and 100, not %0.2f" % min_identity)
+try:
+    min_coverage = float(options.min_coverage)
+except ValueError:
+    stop_err("Expected number between 0 and 100 for minimum coverage, not %r" % min_coverage)
+if not (0 <= min_coverage <= 100):
+    stop_err("Expected minimum coverage between 0 and 100, not %0.2f" % min_coverage)
+
+if not options.task:
+    stop_err("Missing BLAST task, e.g. -t blastp")
+blast_type = options.task
+
+if not options.dbtype:
+    stop_err("Missing database type, -a nucl, or -a prot")
+dbtype = options.dbtype
+if dbtype == "nucl":
+    if blast_type in ["megablast", "blastn", "blastn-short", "dc-megablast"]:
+         blast_cmd = "blastn -task %s" % blast_type
+    elif blast_type == "tblastx":
+        blast_cmd = "tblastx"
+    else:
+        stop_err("Invalid BLAST type for BLASTN: %r" % blast_type)
+elif dbtype == "prot":
+    if blast_type not in ["blastp", "blastp-short"]:
+        stop_err("Invalid BLAST type for BLASTP: %r" % blast_type)
+    blast_cmd = "blastp -task %s" % blast_type
+else:
+    stop_err("Expected 'nucl' or 'prot' for BLAST database type, not %r" % blast_type)
+
+try:
+    threads = int(os.environ.get("GALAXY_SLOTS", "1"))
+except:
+    threads = 1
+assert 1 <= threads, threads
+
+makeblastdb_exe = "makeblastdb"
+
+base_path = tempfile.mkdtemp()
+db_a = os.path.join(base_path, "SpeciesA")
+db_b = os.path.join(base_path, "SpeciesB")
+a_vs_b = os.path.join(base_path, "A_vs_B.tabular")
+b_vs_a = os.path.join(base_path, "B_vs_A.tabular")
+log = os.path.join(base_path, "blast.log")
+
+cols = "qseqid sseqid bitscore pident qcovhsp qlen length" #Or qcovs?
+c_query = 0
+c_match = 1
+c_score = 2
+c_identity = 3
+c_coverage = 4
+c_qlen = 5
+c_length = 6
+
+tie_warning = 0
+
+def best_hits(blast_tabular, ignore_self=False):
+    """Iterate over BLAST tabular output, returns best hits as 2-tuples.
+
+    Each return value is (query name, tuple of value for the best hit).
+
+    Tied best hits to different sequences are NOT returned.
+
+    One hit is returned for tied best hits to the same sequence
+    (e.g. repeated domains).
+    """
+    global tie_warning
+    current = None
+    best_score = None
+    best = None
+    with open(blast_tabular) as h:
+        for line in h:
+            if line.startswith("#"):
+                continue
+            parts = line.rstrip("\n").split("\t")
+            if float(parts[c_identity]) < min_identity or float(parts[c_coverage]) < min_coverage:
+                continue
+            a = parts[c_query]
+            b = parts[c_match]
+            if ignore_self and a == b:
+                continue
+            score = float(parts[c_score])
+            qlen = int(parts[c_qlen])
+            length = int(parts[c_length])
+            #print("Considering hit for %s to %s with score %s..." % (a, b, score))
+            if current is None:
+                #First hit
+                assert best is None
+                assert best_score is None
+                best = dict()
+                #Now append this hit...
+            elif a != current:
+                #New hit
+                if len(best) == 1:
+                    #Unambiguous (no tied matches)
+                    yield current, list(best.values())[0]
+                else:
+                    #print("%s has %i equally good hits: %s" % (a, len(best), ", ".join(best)))
+                    tie_warning += 1
+                best = dict()
+                #Now append this hit...
+            elif score < best_score:
+                #print("No improvement for %s, %s < %s" % (a, score, best_score))
+                continue
+            elif score > best_score:
+                #This is better, discard old best
+                best = dict()
+                #Now append this hit...
+            else:
+                #print("Tied best hits for %s" % a)
+                assert best_score == score
+                #Now append this hit...
+            current = a
+            best_score = score
+            #This will collapse two equally good hits to the same target (e.g. duplicated domain)
+            best[b] = (b, score, parts[c_score], parts[c_identity], parts[c_coverage], qlen, length)
+    #Best hit for final query, if unambiguous:
+    if current is not None:
+        if len(best)==1:
+            yield current, list(best.values())[0]
+        else:
+            #print("%s has %i equally good hits: %s" % (a, len(best), ", ".join(best)))
+            tie_warning += 1
+
+
+#print("Starting...")
+#TODO - Report log in case of error?
+run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_a, db_a, log))
+run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_b, db_b, log))
+#print("BLAST databases prepared.")
+run('%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i'
+    % (blast_cmd, fasta_a, db_b, a_vs_b, cols, threads))
+#print("BLAST species A vs species B done.")
+run('%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i'
+    % (blast_cmd, fasta_b, db_a, b_vs_a, cols, threads))
+#print("BLAST species B vs species A done.")
+
+
+best_b_vs_a = dict(best_hits(b_vs_a, self_comparison))
+
+
+count = 0
+outfile = open(out_file, 'w')
+outfile.write("#A_id\tB_id\tA_length\tB_length\tA_qcovhsp\tB_qcovhsp\tlength\tpident\tbitscore\n")
+for a, (b, a_score_float, a_score_str, a_identity_str, a_coverage_str, a_qlen, a_length) in best_hits(a_vs_b, self_comparison):
+    if b not in best_b_vs_a:
+        #Match b has no best hit
+        continue
+    a2, b_score_float, b_score_str, b_identity_str, b_coverage_str, b_qlen, b_length = best_b_vs_a[b]
+    if a != a2:
+        #Not an RBH
+        continue
+    #Start with IDs, lengths, coverage
+    values = [a, b, a_qlen, b_qlen, a_coverage_str, b_coverage_str]
+    #Alignment length was an integer so don't care about original string
+    values.append(min(a_length, b_length))
+    #Output the original string versions of the scores
+    if float(a_identity_str) < float(b_identity_str):
+        values.append(a_identity_str)
+    else:
+        values.append(b_identity_str)
+    if a_score_float < b_score_float:
+        values.append(a_score_str)
+    else:
+        values.append(b_score_str)
+    outfile.write("%s\t%s\t%i\t%i\t%s\t%s\t%i\t%s\t%s\n" % tuple(values))
+    count += 1
+outfile.close()
+print "Done, %i RBH found" % count
+if tie_warning:
+    sys.stderr.write("Warning: Sequencies with tied best hits found, you may have duplicates/clusters\n")
+
+#Remove temp files...
+shutil.rmtree(base_path)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blast_rbh/blast_rbh.xml	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,239 @@
+<tool id="blast_reciprocal_best_hits" name="BLAST Reciprocal Best Hits (RBH)" version="0.1.2">
+    <description>from two FASTA files</description>
+    <requirements>
+            <requirement type="binary">makeblastdb</requirement>
+            <requirement type="binary">blastp</requirement>
+            <requirement type="binary">blastn</requirement>
+            <requirement type="package" version="2.2.29">blast+</requirement>
+    </requirements>
+    <version_command interpreter="python">
+blast_rbh.py --version
+    </version_command>
+    <command interpreter="python">
+blast_rbh.py "$fasta_a" "$fasta_b"
+-a $seq.dbtype
+#if $seq.dbtype=="nucl"
+-t $seq.nucl_type
+#else
+-t $seq.prot_type
+#end if
+-i $identity
+-c $q_cover
+-o "$output"
+    </command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <inputs>
+        <!-- Galaxy does not have sub-types for protein vs nucletide FASTA -->
+        <param name="fasta_a" type="data" format="fasta"
+	       label="Genes/proteins from species A"
+	       description="FASTA file, one sequence per gene/protein." /> 
+        <param name="fasta_b" type="data" format="fasta"
+	       label="Genes/proteins from species B"
+	       description="FASTA file, one sequence per gene/protein." /> 
+        <conditional name="seq">
+            <param name="dbtype" type="select" label="Molecule type of FASTA inputs">
+                <option value="prot">protein</option>
+                <option value="nucl">nucleotide</option>
+            </param>
+            <when value="prot">
+                <param name="prot_type" type="select" display="radio" label="Type of BLAST">
+                    <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option>
+                    <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option>
+                </param>
+            </when>
+            <when value="nucl">
+                <param name="nucl_type" type="select" display="radio" label="Type of BLAST">
+                    <option value="megablast">megablast - Traditional megablast used to find very similar (e.g., intraspecies or closely related species) sequences</option>
+                    <option value="blastn">blastn - Traditional BLASTN requiring an exact match of 11, for somewhat similar sequences</option>
+                    <option value="blastn-short">blastn-short - BLASTN program optimized for sequences shorter than 50 bases</option>
+                    <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option>
+                    <option value="tblastx">tblastx - TBLASTX program using translated query against translated database (protein level matches)</option>
+                </param>
+            </when>
+        </conditional>
+	<param name="identity" type="float" value="70" min="0" max="100"
+	       label="Minimum percentage identity for BLAST matches"
+	       help="Default is 70%, use 0 for no filtering." />
+        <param name="q_cover" type="float" value="50" min="0" max="100"
+	       label="Minimum percentage query coverage for BLAST matches"
+	       help="Default is 50%, use 0 for no filtering." />
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="BLAST RBH: $fasta_a.name vs $fasta_b.name" />
+    </outputs>
+    <requirements>
+    </requirements>
+    <tests>
+        <test>
+            <param name="fasta_a" value="four_human_proteins.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="rhodopsin_proteins.fasta" ftype="fasta"/>
+            <param name="dbtype" value="prot"/>
+            <param name="nucl_type" value="blastp"/>
+            <param name="identity" value="0.0"/>
+            <param name="q_cover" value="0.0"/>
+            <output name="output" file="rbh_blastp_four_human_vs_rhodopsin_proteins.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
+            <param name="dbtype" value="nucl"/>
+            <param name="nucl_type" value="megablast"/>
+            <param name="identity" value="0.0"/>
+            <param name="q_cover" value="0.0"/>
+            <output name="output" file="rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
+            <param name="dbtype" value="nucl"/>
+            <param name="nucl_type" value="megablast"/>
+            <param name="identity" value="92"/>
+            <param name="q_cover" value="86"/>
+            <output name="output" file="rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular" ftype="tabular"/>
+        </test>
+        <!-- push the percentage identity over the 92.07% level -->
+        <test>
+            <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
+            <param name="dbtype" value="nucl"/>
+            <param name="nucl_type" value="megablast"/>
+            <param name="identity" value="92.5"/>
+            <param name="q_cover" value="86"/>
+            <output name="output" file="rbh_none.tabular" ftype="tabular"/>
+        </test>
+	<!-- push the coverage over the 86% level -->
+        <test>
+            <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
+            <param name="dbtype" value="nucl"/>
+            <param name="nucl_type" value="megablast"/>
+            <param name="identity" value="92"/>
+            <param name="q_cover" value="87"/>
+            <output name="output" file="rbh_none.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="fasta_a" value="rhodopsin_nucs.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="three_human_mRNA.fasta" ftype="fasta"/>
+            <param name="dbtype" value="nucl"/>
+            <param name="nucl_type" value="tblastx"/>
+            <param name="identity" value="0.0"/>
+            <param name="q_cover" value="0.0"/>
+            <output name="output" file="rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="fasta_a" value="three_human_mRNA.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="rhodopsin_nucs.fasta" ftype="fasta"/>
+            <param name="dbtype" value="nucl"/>
+            <param name="nucl_type" value="blastn"/>
+            <param name="identity" value="0.0"/>
+            <param name="q_cover" value="0.0"/>
+            <output name="output" file="rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular" ftype="tabular"/>
+        </test>
+        <!-- this pair of examples test tied best hits -->	
+        <test>
+            <param name="fasta_a" value="k12_ten_proteins.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="k12_edited_proteins.fasta" ftype="fasta"/>
+            <param name="dbtype" value="prot"/>
+            <param name="nucl_type" value="blastp"/>
+            <param name="identity" value="0.0"/>
+            <param name="q_cover" value="0.0"/>
+            <output name="output" file="rbh_blastp_k12.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="fasta_a" value="k12_edited_proteins.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="k12_ten_proteins.fasta" ftype="fasta"/>
+            <param name="dbtype" value="prot"/>
+            <param name="nucl_type" value="blastp"/>
+            <param name="identity" value="0.0"/>
+            <param name="q_cover" value="0.0"/>
+            <output name="output" file="rbh_blastp_k12.tabular" ftype="tabular"/>
+        </test>
+        <!-- this tests self-comparison -->
+        <test>
+            <param name="fasta_a" value="k12_edited_proteins.fasta" ftype="fasta"/>
+            <param name="fasta_b" value="k12_edited_proteins.fasta" ftype="fasta"/>
+            <param name="dbtype" value="prot"/>
+            <param name="nucl_type" value="blastp"/>
+            <param name="identity" value="80.0"/>
+            <param name="q_cover" value="80.0"/>
+            <output name="output" file="rbh_blastp_k12_self.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Takes two FASTA files (*species A* and *species B*), builds a BLAST database
+for each, runs reciprocal BLAST searchs (*A vs B*, and *B vs A*), optionally
+filters the HSPs, and then compiles a list of the reciprocal best hits (RBH).
+
+The output from this tool is a tabular file containing multiple columns, with
+information about the BLAST matches used:
+
+====== ==================================
+Column Description
+------ ----------------------------------
+     1 ID from *species A*
+     2 ID from *species B*
+     3 Length of sequence *A*
+     4 Length of sequence *B*
+     5 Percentage of sequence *A* covered
+     6 Percentage of sequence *B* covered
+     7 HSP alignment length
+     8 HSP percentage identity
+     9 HSP bitscore
+====== ==================================
+
+These values correspond to the ``qseqid``/``sseqid``, ``qlen``/``slen``,
+``qcovhsp``, ``length``, ``pident`` and ``bitscore`` values in the BLAST+
+tabular output.
+
+For the alignment length, bitscore and percentage identity the values for
+*A vs B* and *B vs A* are typically the same, so their minimum is shown.
+The coverage values are given by the HSP alignment length divided by the
+sequence length (adjusted by a factor of three for TBLASTX).
+
+Note that if a sequence has equally scoring top BLAST matches to multiple
+sequence in the other file, it will not be considered for an RBH. This
+can happen following gene duplication, or for (near) identical gene
+duplicates.
+
+.. class:: warningmark
+
+**Note**
+
+If you are trying to use BLAST RBH matches to identify candidate orthologues
+or transfer annotation, you *must* use a percentage identity and minimum
+coverage threshold or similiar. See:
+
+Punta and Ofran (2008) The Rough Guide to In Silico Function Prediction,
+or How To Use Sequence and Structure Information To Predict Protein
+Function. PLoS Comput Biol 4(10): e1000160.
+http://dx.doi.org/10.1371/journal.pcbi.1000160
+
+The defaults are to require 70% sequence identity over the aligned region
+(using ``pident`` in the BLAST+ tabular output), and that the HSP alignment
+covers at least 50% of the query sequence (using ``qcovhsp`` in the BLAST+
+tabular output).
+
+
+**References**
+
+A specific paper covering this tool is planned, but please also cite:
+
+Christiam Camacho et al. (2009).
+BLAST+: architecture and applications.
+BMC Bioinformatics. 15;10:421.
+http://dx.doi.org/10.1186/1471-2105-10-421
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh
+    </help>
+    <citations>
+        <citation type="doi">10.1186/1471-2105-10-421</citation>
+        <!-- TODO: Add BibTeX entry / preprint DOI for Galaxy BLAST+ paper -->
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blast_rbh/tool_dependencies.xml	Mon Aug 04 08:13:39 2014 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="blast+" version="2.2.29">
+        <repository changeset_revision="a2ec897aac2c" name="package_blast_plus_2_2_29" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>