Mercurial > repos > peterjc > blast_rbh
changeset 4:d8d9a9069586 draft
v0.1.11 using BLAST+ 2.5.0 and Biopython 1.67
author | peterjc |
---|---|
date | Wed, 19 Apr 2017 07:44:47 -0400 |
parents | 9ba8ebb636f4 |
children | 8f4500f6f2aa |
files | test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular test-data/rbh_blastp_k12.tabular test-data/rbh_blastp_k12_self.tabular test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular test-data/rhodopsin_nucs.fasta tools/blast_rbh/README.rst tools/blast_rbh/blast_rbh.py tools/blast_rbh/blast_rbh.xml tools/blast_rbh/tool_dependencies.xml |
diffstat | 11 files changed, 273 insertions(+), 260 deletions(-) [+] |
line wrap: on
line diff
--- a/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Mon Sep 07 04:40:51 2015 -0400 +++ b/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular Wed Apr 19 07:44:47 2017 -0400 @@ -1,2 +1,2 @@ #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore -ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1213 1047 86 100 1047 92.07 1514 +ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1213 1047 86 100 1047 92.073 1514
--- a/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular Mon Sep 07 04:40:51 2015 -0400 +++ b/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular Wed Apr 19 07:44:47 2017 -0400 @@ -1,2 +1,2 @@ #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 348 348 100 100 348 96.55 701 +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 348 348 100 100 348 96.552 701
--- a/test-data/rbh_blastp_k12.tabular Mon Sep 07 04:40:51 2015 -0400 +++ b/test-data/rbh_blastp_k12.tabular Wed Apr 19 07:44:47 2017 -0400 @@ -1,10 +1,10 @@ #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore -gi|16127995|ref|NP_414542.1| gi|16127995|ref|NP_414542.1| 21 21 100 100 21 100.00 38.1 -gi|16127996|ref|NP_414543.1| gi|16127996|ref|NP_414543.1| 820 820 100 100 820 100.00 1687 -gi|16127997|ref|NP_414544.1| gi|16127997|ref|NP_414544.1| 310 310 100 100 310 100.00 642 -gi|16127998|ref|NP_414545.1| gi|16127998|ref|NP_414545.1| 428 428 100 100 428 100.00 882 -gi|16128000|ref|NP_414547.1| gi|16128000|ref|NP_414547.1| 258 258 100 100 258 100.00 531 -gi|16128001|ref|NP_414548.1| gi|16128001|ref|NP_414548.1| 476 476 100 100 476 100.00 959 -gi|16128002|ref|NP_414549.1| gi|16128002|ref|NP_414549.1| 317 317 100 100 317 100.00 648 -gi|16128003|ref|NP_414550.1| gi|16128003|ref|NP_414550.1| 195 195 100 100 195 100.00 397 -gi|16128004|ref|NP_414551.1| gi|16128004|ref|NP_414551.1| 188 188 100 100 188 100.00 365 +gi|16127995|ref|NP_414542.1| gi|16127995|ref|NP_414542.1| 21 21 100 100 21 100.000 38.1 +gi|16127996|ref|NP_414543.1| gi|16127996|ref|NP_414543.1| 820 820 100 100 820 100.000 1687 +gi|16127997|ref|NP_414544.1| gi|16127997|ref|NP_414544.1| 310 310 100 100 310 100.000 642 +gi|16127998|ref|NP_414545.1| gi|16127998|ref|NP_414545.1| 428 428 100 100 428 100.000 882 +gi|16128000|ref|NP_414547.1| gi|16128000|ref|NP_414547.1| 258 258 100 100 258 100.000 531 +gi|16128001|ref|NP_414548.1| gi|16128001|ref|NP_414548.1| 476 476 100 100 476 100.000 959 +gi|16128002|ref|NP_414549.1| gi|16128002|ref|NP_414549.1| 317 317 100 100 317 100.000 648 +gi|16128003|ref|NP_414550.1| gi|16128003|ref|NP_414550.1| 195 195 100 100 195 100.000 397 +gi|16128004|ref|NP_414551.1| gi|16128004|ref|NP_414551.1| 188 188 100 100 188 100.000 365
--- a/test-data/rbh_blastp_k12_self.tabular Mon Sep 07 04:40:51 2015 -0400 +++ b/test-data/rbh_blastp_k12_self.tabular Wed Apr 19 07:44:47 2017 -0400 @@ -1,5 +1,5 @@ #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore -gi|16127997|ref|NP_414544.1| NP_414544_near_copy 310 309 99 100 309 99.68 638 -NP_414544_near_copy gi|16127997|ref|NP_414544.1| 309 310 100 99 309 99.68 638 -NP_414546_near_copy_1 NP_414546_near_copy_2 99 100 99 98 98 100.00 197 -NP_414546_near_copy_2 NP_414546_near_copy_1 100 99 98 99 98 100.00 197 +gi|16127997|ref|NP_414544.1| NP_414544_near_copy 310 309 99 100 309 99.676 638 +NP_414544_near_copy gi|16127997|ref|NP_414544.1| 309 310 100 99 309 99.676 638 +NP_414546_near_copy_1 NP_414546_near_copy_2 99 100 99 98 98 100.000 197 +NP_414546_near_copy_2 NP_414546_near_copy_1 100 99 98 99 98 100.000 197
--- a/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Mon Sep 07 04:40:51 2015 -0400 +++ b/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular Wed Apr 19 07:44:47 2017 -0400 @@ -1,2 +1,2 @@ #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 100 86 1047 92.07 1474 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 100 86 1047 92.073 1474
--- a/test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular Mon Sep 07 04:40:51 2015 -0400 +++ b/test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular Wed Apr 19 07:44:47 2017 -0400 @@ -1,2 +1,2 @@ #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 22 19 230 97.39 559 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 22 19 230 97.391 559
--- a/test-data/rhodopsin_nucs.fasta Mon Sep 07 04:40:51 2015 -0400 +++ b/test-data/rhodopsin_nucs.fasta Wed Apr 19 07:44:47 2017 -0400 @@ -1,161 +1,138 @@ >gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA -ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCT -TCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCT -CATCGTGCTTGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACG -CCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCA -CCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGC -CACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTGAGCGGTACGTGGTGGTGTGT -AAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACCTGGGTCATGG -CACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATG -CGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTC -CACTTCACCATCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGG -CAGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCAT -GGTCATTGCTTTCCTGATCTGTTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGG -TCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCTTCGCAAAGTCCTCCTCCATCTACAACCCTG -TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCC -ACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACCGGCCTAA - +ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCTTCGAGTACCC +ACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCTCATCGTGCTTGGCTTCCCCA +TCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCC +GTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCACCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCC +CACAGGATGCAATTTGGAGGGCTTCTTTGCCACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTG +AGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACC +TGGGTCATGGCACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATG +CGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTCCACTTCACCA +TCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGGCAGCCCAGCAGCAGGAGTCA +GCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCATGGTCATTGCTTTCCTGATCTGTTGGGTGCC +CTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGGTCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCT +TCGCAAAGTCCTCCTCCATCTACAACCCTGTCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACC +CTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACC +GGCCTAA >gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds -TCTTTCTAGTTTGGGGGGGGGGACTTTAAAGAGCCGCCAATATGAACGGAACAGAAGGCCCAAACTTTTA -CATACCCATGTCCAACAAGACTGGGGTGGTGCGAAGCCCCTTTGAATACCCTCAGTATTACCTGGCAGAG -CCATGGCAATATTCCATTCTGTGCGCGTACATGTTCCTGCTCATTCTACTTGGGTTCCCAATCAACTTCA -TGACCTTGTACGTCACCATCCAGCACAAGAAGCTCCGGACACCCTTAAACTATATCCTGCTGAATTTGGC -CTTTGCCAACCACTTCATGGTCCTGTGTGGATTCACGGTGACAATGTACTCCTCAATGAACGGATACTTC -ATCCTCGGAGCCACCGGTTGCTATGTTGAAGGCTTCTTCGCTACCCTTGGTGGTGAAATCGCCCTTTGGT -CCCTGGTGGTCTTGGCCATTGAACGATACGTGGTCGTCTGTAAGCCCATGAGCAACTTCCGATTTAGTGA -GAACCATGCCGTCATGGGCGTAGCGTTCACCTGGATAATGGCTTTGTCCTGTGCTGTTCCTCCACTCCTT -GGATGGTCCAGGTACATCCCCGAGGGCATGCAGTGCTCCTGCGGAGTCGACTACTACACCCTGAAGCCCG -AGGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTCGTCCACTTCACCATCCCCCTGATTATCATTTT -CTTCTGCTATGGCCGCCTGGTGTGCACTGTGAAAGAGGCTGCAGCTCAACAGCAAGAGTCCGCCACCACC -CAGAAGGCCGAGAAAGAGGTGACCAGGATGGTGATCATCATGGTGGTCTTCTTCCTTATCTGTTGGGTCC -CCTACGCCTCTGTCGCTTTCTTCATCTTCAGCAATCAGGGCTCTGAGTTCGGCCCCATCTTCATGACCGT -CCCAGCTTTCTTTGCCAAGAGTTCTTCCATCTACAACCCCGTCATCTACATCATGCTCAACAAGCAGTTC -CGTAACTGCATGATCACCACCCTGTGCTGCGGCAAGAATCCCTTTGGAGAAGACGATGCCTCCTCTGCCG -CCACCTCCAAGACAGAGGCTTCTTCTGTTTCTTCCAGCCAGGTGTCTCCTGCATAAGACCTTCCACCAGG -CCTGTCTCAGGGTCCGCTGCCTCACACAGCTCCCACCGCCCCAACTCCGTCTCCTGCTCGCTAAGGCGGC -GAAGTTCCCCTTCCATTACATAAAACGTATCTGTTCAAGAAAGGCGACGACGAAGGAGAAGAAGAGGAGC -CCCCCCGAACCCCTTCGCTGCTGCTGAAAACGACTTGATTGCTTCTGCAACGCAACGGGGCCTTACGGCA -GCGAAGGGGTTGTCATCCGGACGCGCCAAGAATTCCTTCGAGACTGTAAATATCTTAAAGGAACCGTCCT -GCTAGTTACCGACGCCGCTCCTGTAGCCGCCGTTCCCCCGCACTCCGGCCGGTTCATACCTCTTATTTTT -TTGCAATGCAACAGAAAATAATATTTTTGTTCCCACGGCTTTTCCCGGTCAGGTCTGGTAGTGGCGGAGA -TTGGCCGACCCCTCGCACCTGTAATAAAGCGCAG - +TCTTTCTAGTTTGGGGGGGGGGACTTTAAAGAGCCGCCAATATGAACGGAACAGAAGGCCCAAACTTTTACATACCCATG +TCCAACAAGACTGGGGTGGTGCGAAGCCCCTTTGAATACCCTCAGTATTACCTGGCAGAGCCATGGCAATATTCCATTCT +GTGCGCGTACATGTTCCTGCTCATTCTACTTGGGTTCCCAATCAACTTCATGACCTTGTACGTCACCATCCAGCACAAGA +AGCTCCGGACACCCTTAAACTATATCCTGCTGAATTTGGCCTTTGCCAACCACTTCATGGTCCTGTGTGGATTCACGGTG +ACAATGTACTCCTCAATGAACGGATACTTCATCCTCGGAGCCACCGGTTGCTATGTTGAAGGCTTCTTCGCTACCCTTGG +TGGTGAAATCGCCCTTTGGTCCCTGGTGGTCTTGGCCATTGAACGATACGTGGTCGTCTGTAAGCCCATGAGCAACTTCC +GATTTAGTGAGAACCATGCCGTCATGGGCGTAGCGTTCACCTGGATAATGGCTTTGTCCTGTGCTGTTCCTCCACTCCTT +GGATGGTCCAGGTACATCCCCGAGGGCATGCAGTGCTCCTGCGGAGTCGACTACTACACCCTGAAGCCCGAGGTCAACAA +CGAGTCCTTCGTCATCTACATGTTCGTCGTCCACTTCACCATCCCCCTGATTATCATTTTCTTCTGCTATGGCCGCCTGG +TGTGCACTGTGAAAGAGGCTGCAGCTCAACAGCAAGAGTCCGCCACCACCCAGAAGGCCGAGAAAGAGGTGACCAGGATG +GTGATCATCATGGTGGTCTTCTTCCTTATCTGTTGGGTCCCCTACGCCTCTGTCGCTTTCTTCATCTTCAGCAATCAGGG +CTCTGAGTTCGGCCCCATCTTCATGACCGTCCCAGCTTTCTTTGCCAAGAGTTCTTCCATCTACAACCCCGTCATCTACA +TCATGCTCAACAAGCAGTTCCGTAACTGCATGATCACCACCCTGTGCTGCGGCAAGAATCCCTTTGGAGAAGACGATGCC +TCCTCTGCCGCCACCTCCAAGACAGAGGCTTCTTCTGTTTCTTCCAGCCAGGTGTCTCCTGCATAAGACCTTCCACCAGG +CCTGTCTCAGGGTCCGCTGCCTCACACAGCTCCCACCGCCCCAACTCCGTCTCCTGCTCGCTAAGGCGGCGAAGTTCCCC +TTCCATTACATAAAACGTATCTGTTCAAGAAAGGCGACGACGAAGGAGAAGAAGAGGAGCCCCCCCGAACCCCTTCGCTG +CTGCTGAAAACGACTTGATTGCTTCTGCAACGCAACGGGGCCTTACGGCAGCGAAGGGGTTGTCATCCGGACGCGCCAAG +AATTCCTTCGAGACTGTAAATATCTTAAAGGAACCGTCCTGCTAGTTACCGACGCCGCTCCTGTAGCCGCCGTTCCCCCG +CACTCCGGCCGGTTCATACCTCTTATTTTTTTGCAATGCAACAGAAAATAATATTTTTGTTCCCACGGCTTTTCCCGGTC +AGGTCTGGTAGTGGCGGAGATTGGCCGACCCCTCGCACCTGTAATAAAGCGCAG >gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds -GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGC -CATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCT -CACGCTCTATGTCACGGTTCAGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCC -GTGGCCGACCTCTTCATGGTCTTCGGAGGCTTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTG -TCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGCCACCCTGGGAGGTATGAGCTGAGATGCGGG -TAAGGAGGAGGCATAGAGGCATCTGGGAACAGTCCCAAGCTTGGGGTGAAGGCTAAGAGGCCTTCTTCCT -TGTTCTGTCATTGGCGTCGTCCGAAGCCCTCACTTAATCAACAAACAGTTTGGTGGTGAGGCGCTGAGCT -CCATTTGGAGAGGGCAGGTATCGAGCACTGTTTTATCCCCCCTGGAGTGGTGCCATTGCCTTGCTTTACA -GCAAAGAAACTGAGGATGAGAGGAGTCGAGGGTCTTGCCAGGTCACATCATGGCAGAGACAGAGCTGAGT -TTCAACCCTGCATCTATGTGCAGTTTCCCTTGGAGCAGCTATGTTAGGTCAGACCCACGGTGGGCACTGG -GGAGAGAGCTGCACAAGACAGGTCCCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTCCTGATTGCCA -GGAGTGATGTGCAGCGCAAATGTCTGAATTCCATTATTATGTGCTCCTTCTTCCTCTGAGCCAAACATCC -ATCTTCATGGCTCCTAGAATTGGGTCCCACCCACATGAGCAGGTCATTTTGTTTCCCTAGAGGGGAGAGG -TCACTGCTGTGGAGGGAGGGAAGGTTCGTCCCGCTCCATGTTTCTGTTGTCTCTGCAATGCCTTTCTCTA -GGGACTCTGCCTATTGCCCCAAGAAGGACACATTCTTCTGTAAAAACTCCCTCCTGGGTTCCCAGTCTAA -TCAAGACCTCTAAACTGATTTCCATGTCCCTCATGAACCCAAAGCTCTAACTGAATTAAACTTCTCAGGA -CTTACTCCACTCTCCTCGTCCATCATGCAGCCCCTCTGCCCAGCACCCTATCTCCTCTTCTTCCCAGTGT -CTGAGCCCACTGTACCCTGAGACTTCGCTCCAGGCCTGCCCCAGGCTGCCTTCTCAGGTGCCCTCTCCCA -CATAGGAGGAGCACGGCCTCCTTAGACAGACGTGGGGTGCAGGTTGGTGGCATGCTGACTGATAGCTGAC -TGCCTTGCAGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTATGCA -AGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCCTTGCCCTCACCTGGGTCATGGC -ACTGGCCTGCGCCGCGCCCCCGCTAGTCGGCTGGTCCAGGTAATGGCACTGAACAGAAGGGAAGTGCCTC -TGAGGTCTTCTTAGGGTCCCCCAGCTGGGACTCAAACCTAGGGCTGTCTGGTTCCAGGCACGGAACTGGC -GACTCCACTGGGGTTGGGGTTTAGGGCAAGGAAGGAGAGGATCAGACCCTAATGTTGTTACGTGGGTTGG -TCCGCATGTCAAGGAGAATCCAAGACACCCAATCCTTCACCTTGGCTGTGCCCCTAATCCTCATCTAAGC -CAGGTTCAGATTCCAATCCTCTTTGGCCCAGTGCTCCGTGGGAAGCTCCCTCTGACCTTGGGCCTCAGCG -CCTGGGGTTGCTGAGCCTTCCTAGTATAGGTGGTGACATCGTAGCCCCTGGGACCTGGATCCTGCCCAGT -CTGCAGGCCATCATCTCCAAATGGGGCTGAGATGAGATGTGAGGAAAGAGGGGAGACAGTGGTTTGGAAA -ACTGGACTGGTGGCTTTTTTGGGTTTCCAGAGGACTCATCTTCCTCTGCTTCTAGAATATTCCCACTCTC -TCTTCCCTTTCCTCATTCTTCCTGGGTTATTTTTTTTTCCCTTTGCTGAATTCGAGCCCCATTCCCTCCA -GCCTCTTTCCCTGTCTTATCTAGCCCAGTCCAGTTATATTCTCATAGGCAGAGGCAACAGATGCTCCAAA -TTTTCTGAGGTCGGTTCCAACATCGCCACCCTCTAAAATCAGTGAAACATCCTAACTACATGCCTCATAG -TCCTCCTGTTTCCAAAAACTGCAAAGATCTCCTGGTTACCCTGTATGCCCATCTTTGGGCTAGAAAATCC -TCTCACCCTGTTAATAGTAAGACCCTGGTTTGTACAAACTGCCTCAAACACAGAGTTTAGGGGCTTTTCC -CTTCTCTCCGCCAACCTCTGACAGGCAGAGTCTGAGGCCTGGCCTCCAGCTGCTGCGGGGAGCAGGTCTG -GTAAAGAATCCTGTGCAGGTCAGTGGTATACAGGTCCTGTCAGGTGACAGCCTGGGCGAGAGACTGGAAA -GTATCAGGATAACACGGCTGCCAGACGAACAACAAAACAACACTGAATTCACAAGGCGCATTCGAATCCT -CTCTCAGTCCATTTGATCCTCAGTCACACAGCCGAGTAGACACTTTATCAACTCATTTAACAGAAAGGGA -AAGTGAAGCCCAGAGCGAGGCCAGCAACGTGGCAGGTCACTCTGGTCATCTAGGGCCTGTTCCCAACTCT -TTCACATGTGGGTCTCCAATATGTTCCCTCCTGTCCCAATCTCTGCCGGCCCTCAGGTACATCCCAGAGG -GCATGCAGTGCTCATGTGGAATCGACTACTACACCCTCAAGCCGGAGGTCAACAACGAGTCCTTTGTCAT -CTACATGTTCGTGGTCCACTTCACCATCCCTATGATTGTCATATTCTTTTGCTATGGACAGCTGGTCTTC -ACCGTCAAGGAGGTAAGGTCATGTGTTGGGCACTGGGGACATGCACACTGAGTGAATGGAGCCCAGCTCC -ATTCCCAGAGTTGCCACAGTCTGGACACCTGACCTTGTGTCCCTGCAGGCAGCTGCCCAGCAGCAGGAGT -CAGCCACCACCCAGAAGGCCGAGAAGGAGGTCACCCGTATGGTCATCATCATGGTCATTGCTTTCCTAAT -CTGTTGGCTGCCGTATGCCGGCGTGGCATTCTACATCTTCACCCACCAGGGCTCTAACTTTGGCCCCATC -TTCATGACCCTCCCGGCATTCTTTGCCAAGTCGTCCTCCATCTACAACCCTGTCATCTATATCATGATGA -ACAAGCAGGTGCCAGGTGGTAGGGAGGGAGGGTCTGGGTCCCCCAGGCTGCAGGCACTGCCCACAGAGGA -CAAGCCACATCCTTGACTAGGCAGACCCCAGTCTTCCCATCTGCAAAATTAGGCAGGGGAGTTCGTCTCC -CCCAGGCATCAGAGACATCGGGGAGAAATGCACATTTCTGGAGATGAATCAGCATCTCAGGGTGGGCCCA -GGAACCTGCACTTCTAAAAACCATTCCACATGACTCTGAGGCTAGCATGAGAAGTGATGATCCACATGGT -TCTGGAGGCCTGCTTTAAAAGTCAAGTGGTCAAAGTCCCAAGCCTGGGAACGGGATGGTGCCAGTCTCCA -TTAAAGAGATCAAAAGGAGCTAGAAAGTCTTGTGATGAAAGATGAAGGGATAAAGCCGTCCTTTAACACA -GATCAGTGATTTCTCTGCAGAATCCATGACCCAGTGGGAAAAAGTGGTCCCTGGAGTCAGGCATATTGGA -TTCAAATCCTAGCTCTGCTATTTTCTAGCTATGTAACCTTGGGCAAGTCATCTCCCTTCTCTGTGCTTCA -GTTTCTTCTTTCATAGAAAGGGTAAAATCCCAAACTCTTGGGTTAAATGAGATAACTTACATAGCCCTTG -ATATGCAGAGGCATTATGGAATGTCGTTAGTGACAAAGTTCCCTTGGGTTTGGTCCCTGGTATCTCTGGA -GTGAGATTGCATATGTTCCCTTCAGAGGGTCAGATTTGGGATGAGAGTGGAGGCTGCGAGGGCCTGAGTG -GGAAGGGATTGGAGGCAAATCTCACCAACCATGTCAGTTTGCTACACACACTTTGGGTGGACCCTGACCC -TGACTCATGCTTCTTGCCTTCCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCCAC -TGGGTGACGATGAGGCCTCCACCACTGCCTC - +GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGCCATGGCAGTT +CTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCTCACGCTCTATGTCACGGTTC +AGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCCGACCTCTTCATGGTCTTCGGAGGC +TTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTGTCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGC +CACCCTGGGAGGTATGAGCTGAGATGCGGGTAAGGAGGAGGCATAGAGGCATCTGGGAACAGTCCCAAGCTTGGGGTGAA +GGCTAAGAGGCCTTCTTCCTTGTTCTGTCATTGGCGTCGTCCGAAGCCCTCACTTAATCAACAAACAGTTTGGTGGTGAG +GCGCTGAGCTCCATTTGGAGAGGGCAGGTATCGAGCACTGTTTTATCCCCCCTGGAGTGGTGCCATTGCCTTGCTTTACA +GCAAAGAAACTGAGGATGAGAGGAGTCGAGGGTCTTGCCAGGTCACATCATGGCAGAGACAGAGCTGAGTTTCAACCCTG +CATCTATGTGCAGTTTCCCTTGGAGCAGCTATGTTAGGTCAGACCCACGGTGGGCACTGGGGAGAGAGCTGCACAAGACA +GGTCCCTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNTTCCTGATTGCCAGGAGTGATGTGCAGCGCAAATGTCTGAATTCCATTATTAT +GTGCTCCTTCTTCCTCTGAGCCAAACATCCATCTTCATGGCTCCTAGAATTGGGTCCCACCCACATGAGCAGGTCATTTT +GTTTCCCTAGAGGGGAGAGGTCACTGCTGTGGAGGGAGGGAAGGTTCGTCCCGCTCCATGTTTCTGTTGTCTCTGCAATG +CCTTTCTCTAGGGACTCTGCCTATTGCCCCAAGAAGGACACATTCTTCTGTAAAAACTCCCTCCTGGGTTCCCAGTCTAA +TCAAGACCTCTAAACTGATTTCCATGTCCCTCATGAACCCAAAGCTCTAACTGAATTAAACTTCTCAGGACTTACTCCAC +TCTCCTCGTCCATCATGCAGCCCCTCTGCCCAGCACCCTATCTCCTCTTCTTCCCAGTGTCTGAGCCCACTGTACCCTGA +GACTTCGCTCCAGGCCTGCCCCAGGCTGCCTTCTCAGGTGCCCTCTCCCACATAGGAGGAGCACGGCCTCCTTAGACAGA +CGTGGGGTGCAGGTTGGTGGCATGCTGACTGATAGCTGACTGCCTTGCAGGTGAAATTGCCCTGTGGTCCTTGGTGGTCC +TGGCCATCGAGCGGTACGTGGTGGTATGCAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCCTT +GCCCTCACCTGGGTCATGGCACTGGCCTGCGCCGCGCCCCCGCTAGTCGGCTGGTCCAGGTAATGGCACTGAACAGAAGG +GAAGTGCCTCTGAGGTCTTCTTAGGGTCCCCCAGCTGGGACTCAAACCTAGGGCTGTCTGGTTCCAGGCACGGAACTGGC +GACTCCACTGGGGTTGGGGTTTAGGGCAAGGAAGGAGAGGATCAGACCCTAATGTTGTTACGTGGGTTGGTCCGCATGTC +AAGGAGAATCCAAGACACCCAATCCTTCACCTTGGCTGTGCCCCTAATCCTCATCTAAGCCAGGTTCAGATTCCAATCCT +CTTTGGCCCAGTGCTCCGTGGGAAGCTCCCTCTGACCTTGGGCCTCAGCGCCTGGGGTTGCTGAGCCTTCCTAGTATAGG +TGGTGACATCGTAGCCCCTGGGACCTGGATCCTGCCCAGTCTGCAGGCCATCATCTCCAAATGGGGCTGAGATGAGATGT +GAGGAAAGAGGGGAGACAGTGGTTTGGAAAACTGGACTGGTGGCTTTTTTGGGTTTCCAGAGGACTCATCTTCCTCTGCT +TCTAGAATATTCCCACTCTCTCTTCCCTTTCCTCATTCTTCCTGGGTTATTTTTTTTTCCCTTTGCTGAATTCGAGCCCC +ATTCCCTCCAGCCTCTTTCCCTGTCTTATCTAGCCCAGTCCAGTTATATTCTCATAGGCAGAGGCAACAGATGCTCCAAA +TTTTCTGAGGTCGGTTCCAACATCGCCACCCTCTAAAATCAGTGAAACATCCTAACTACATGCCTCATAGTCCTCCTGTT +TCCAAAAACTGCAAAGATCTCCTGGTTACCCTGTATGCCCATCTTTGGGCTAGAAAATCCTCTCACCCTGTTAATAGTAA +GACCCTGGTTTGTACAAACTGCCTCAAACACAGAGTTTAGGGGCTTTTCCCTTCTCTCCGCCAACCTCTGACAGGCAGAG +TCTGAGGCCTGGCCTCCAGCTGCTGCGGGGAGCAGGTCTGGTAAAGAATCCTGTGCAGGTCAGTGGTATACAGGTCCTGT +CAGGTGACAGCCTGGGCGAGAGACTGGAAAGTATCAGGATAACACGGCTGCCAGACGAACAACAAAACAACACTGAATTC +ACAAGGCGCATTCGAATCCTCTCTCAGTCCATTTGATCCTCAGTCACACAGCCGAGTAGACACTTTATCAACTCATTTAA +CAGAAAGGGAAAGTGAAGCCCAGAGCGAGGCCAGCAACGTGGCAGGTCACTCTGGTCATCTAGGGCCTGTTCCCAACTCT +TTCACATGTGGGTCTCCAATATGTTCCCTCCTGTCCCAATCTCTGCCGGCCCTCAGGTACATCCCAGAGGGCATGCAGTG +CTCATGTGGAATCGACTACTACACCCTCAAGCCGGAGGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTCCACT +TCACCATCCCTATGATTGTCATATTCTTTTGCTATGGACAGCTGGTCTTCACCGTCAAGGAGGTAAGGTCATGTGTTGGG +CACTGGGGACATGCACACTGAGTGAATGGAGCCCAGCTCCATTCCCAGAGTTGCCACAGTCTGGACACCTGACCTTGTGT +CCCTGCAGGCAGCTGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCCGAGAAGGAGGTCACCCGTATGGTCATCATC +ATGGTCATTGCTTTCCTAATCTGTTGGCTGCCGTATGCCGGCGTGGCATTCTACATCTTCACCCACCAGGGCTCTAACTT +TGGCCCCATCTTCATGACCCTCCCGGCATTCTTTGCCAAGTCGTCCTCCATCTACAACCCTGTCATCTATATCATGATGA +ACAAGCAGGTGCCAGGTGGTAGGGAGGGAGGGTCTGGGTCCCCCAGGCTGCAGGCACTGCCCACAGAGGACAAGCCACAT +CCTTGACTAGGCAGACCCCAGTCTTCCCATCTGCAAAATTAGGCAGGGGAGTTCGTCTCCCCCAGGCATCAGAGACATCG +GGGAGAAATGCACATTTCTGGAGATGAATCAGCATCTCAGGGTGGGCCCAGGAACCTGCACTTCTAAAAACCATTCCACA +TGACTCTGAGGCTAGCATGAGAAGTGATGATCCACATGGTTCTGGAGGCCTGCTTTAAAAGTCAAGTGGTCAAAGTCCCA +AGCCTGGGAACGGGATGGTGCCAGTCTCCATTAAAGAGATCAAAAGGAGCTAGAAAGTCTTGTGATGAAAGATGAAGGGA +TAAAGCCGTCCTTTAACACAGATCAGTGATTTCTCTGCAGAATCCATGACCCAGTGGGAAAAAGTGGTCCCTGGAGTCAG +GCATATTGGATTCAAATCCTAGCTCTGCTATTTTCTAGCTATGTAACCTTGGGCAAGTCATCTCCCTTCTCTGTGCTTCA +GTTTCTTCTTTCATAGAAAGGGTAAAATCCCAAACTCTTGGGTTAAATGAGATAACTTACATAGCCCTTGATATGCAGAG +GCATTATGGAATGTCGTTAGTGACAAAGTTCCCTTGGGTTTGGTCCCTGGTATCTCTGGAGTGAGATTGCATATGTTCCC +TTCAGAGGGTCAGATTTGGGATGAGAGTGGAGGCTGCGAGGGCCTGAGTGGGAAGGGATTGGAGGCAAATCTCACCAACC +ATGTCAGTTTGCTACACACACTTTGGGTGGACCCTGACCCTGACTCATGCTTCTTGCCTTCCAGTTCCGGAACTGCATGC +TCACTACCCTCTGCTGTGGCAAGAACCCACTGGGTGACGATGAGGCCTCCACCACTGCCTC >gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds -GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGC -CCTGGCAGTTCTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCT -CACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCT -GTGGCCAACCTCTTCATGGTCTTTGGAGGCTTCACCACCACCCTGTATACCTCTATGCATGGATACTTCG -TCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGCCACGCTGGGCGGTGAAATCGCCCTGTGGTC -CCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGAGCAACTTCCGCTTTGGGGAG -AACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCCCCACTAGCCG -GCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGA -GGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTC -TTCTGCTACGGACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCC -AGAAGGCCGAGAAGGAAGTCACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCC -CTACGCCAGCGTGGCATTCTACATCTTTACCCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATC -CCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGGTCATCTATATCATGATGAACAAGCAGTTCC -GGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAAGCATCCACCACTGC -CTC - +GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGCCCTGGCAGTT +CTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCC +AGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCTGTGGCCAACCTCTTCATGGTCTTTGGAGGC +TTCACCACCACCCTGTATACCTCTATGCATGGATACTTCGTCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGC +CACGCTGGGCGGTGAAATCGCCCTGTGGTCCCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGA +GCAACTTCCGCTTTGGGGAGAACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCC +CCACTAGCCGGCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGA +GGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTCTTCTGCTACG +GACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCCGAGAAGGAAGTC +ACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCCCTACGCCAGCGTGGCATTCTACATCTTTAC +CCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATCCCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGG +TCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGAT +GACGAAGCATCCACCACTGCCTC >gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds -ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCT -TCGAGGCGCCGCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCT -GATCATGCTTGGCTTCCCCATCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACC -CCCCTCAACTACATCCTGCTCAACCTGGCCGTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCA -CCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCCGACGGGCTGCAACCTCGAGGGCTTCTTTGC -CACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCGAGCGGTACGTAGTGGTGTGC -AAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACCTGGGTCATGG -CTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTG -CGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTA -CACTTCATCATCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTG -CAGCCCAGCAGCAGGAGTCGGCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCAT -GGTCATCGCTTTCCTCATATGCTGGCTGCCCTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGA -TCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCTTTGCCAAGACGTCTGCCGTCTATAACCCCG -TCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACTCTCTGCTGTGGCAAGAACCC -CCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCCTGCCTAA - +ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCTTCGAGGCGCC +GCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCTGATCATGCTTGGCTTCCCCA +TCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACCCCCCTCAACTACATCCTGCTCAACCTGGCC +GTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCACCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCC +GACGGGCTGCAACCTCGAGGGCTTCTTTGCCACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCG +AGCGGTACGTAGTGGTGTGCAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACC +TGGGTCATGGCTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTG +CGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTACACTTCATCA +TCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTGCAGCCCAGCAGCAGGAGTCG +GCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCATGGTCATCGCTTTCCTCATATGCTGGCTGCC +CTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGATCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCT +TTGCCAAGACGTCTGCCGTCTATAACCCCGTCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACT +CTCTGCTGTGGCAAGAACCCCCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCC +TGCCTAA >gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds -CCGCTACTGACGAACCGCAACCATGAACGGCACTGAGGGACCTAACTTCTACATCCCCATGTCAAACGCC -ACTGGTGTAGTGAGGAGTCCATTTGAATACCCGCAGTACTACCTTGCAGAACCATGGGCTTTCTCAGCTC -TGTCTGCCTACATGTTCTTCCTGATTATCGCCGGATTCCCCATCAACTTCCTCACCCTGTATGTCACCAT -CGAACATAAGAAACTGAGGACCCCACTGAACTACATTCTGCTGAACCTGGCCGTGGCCGACCTCTTCATG -GTGTTTGGCGGATTCACCACCACGATGTACACCTCCATGCACGGCTACTTTGTCTTCGGCCCCACCGGCT -GCAACATCGAAGGGTTCTTCGCCACCCTCGGCGGCGAGATTGCCCTCTGGTGCCTCGTTGTCCTGGCCAT -TGAAAGGTGGATGGTCGTCTGCAAGCCAGTGACCAATTTCCGCTTCGGTGAGAGCCATGCCATCATGGGT -GTCATGGTGACCTGGACCATGGCATTGGCCTGTGCCCTCCCCCCTCTCTTCGGCTGGTCTCGGTACATTC -CGGAAGGTCTGCAGTGCTCGTGCGGGATCGACTACTATACCCGGGCGCCTGGGATCAACAATGAGTCCTT -TGTGATCTACATGTTTACCTGCCACTTCTCCATCCCACTCGCCGTCATCTCTTTCTGCTACGGCCGACTG -GTGTGCACCGTCAAAGAGGCCGCTGCCCAGCAACAGGAGTCCGAGACCACCCAGAGGGCTGAGCGGGAGG -TCACCCGCATGGTCGTCATCATGGTCATCTCCTTCCTGGTCTGCTGGGTGCCCTATGCCAGTGTGGCCTG -GTACATCTTTACCCACCAGGGAAGCACTTTTGGGCCCATCTTCATGACCATTCCATCCTTCTTTGCCAAG -AGTTCAGCCCTCTACAACCCCATGATCTACATCTGCATGAACAAGCAGTTCCGCCATTGCATGATCACCA -CCCTCTGCTGTGGGAAGAACCCCTTCGAGGAGGAGGATGGAGCGTCCGCCACTAGCTCTAAAACTGAGGC -TTCATCCGTGTCCTCCAGCTCTGTCTCCCCGGCATAAACCTTGTTTGACCGAACACCACGCATCAACACA -AAGACCAAGAATGCTGACTAAATGCTAACATTTCAGGGAAATCCAAAGACTTTTTACTATTTTTTTACAC -AACCATATAGGTTGCAAACAGAGGTTTAGCCCTGTTTACAGGTTGTCATCAATGTGATGTCAGTATGTAC -AATATAGTCAACTTGATAGCAAGTTGTTGGCTTATTTCAGATTGTATGGGCAATGTAATCAACCATATGT -GAAATAAATTGCAA +CCGCTACTGACGAACCGCAACCATGAACGGCACTGAGGGACCTAACTTCTACATCCCCATGTCAAACGCCACTGGTGTAG +TGAGGAGTCCATTTGAATACCCGCAGTACTACCTTGCAGAACCATGGGCTTTCTCAGCTCTGTCTGCCTACATGTTCTTC +CTGATTATCGCCGGATTCCCCATCAACTTCCTCACCCTGTATGTCACCATCGAACATAAGAAACTGAGGACCCCACTGAA +CTACATTCTGCTGAACCTGGCCGTGGCCGACCTCTTCATGGTGTTTGGCGGATTCACCACCACGATGTACACCTCCATGC +ACGGCTACTTTGTCTTCGGCCCCACCGGCTGCAACATCGAAGGGTTCTTCGCCACCCTCGGCGGCGAGATTGCCCTCTGG +TGCCTCGTTGTCCTGGCCATTGAAAGGTGGATGGTCGTCTGCAAGCCAGTGACCAATTTCCGCTTCGGTGAGAGCCATGC +CATCATGGGTGTCATGGTGACCTGGACCATGGCATTGGCCTGTGCCCTCCCCCCTCTCTTCGGCTGGTCTCGGTACATTC +CGGAAGGTCTGCAGTGCTCGTGCGGGATCGACTACTATACCCGGGCGCCTGGGATCAACAATGAGTCCTTTGTGATCTAC +ATGTTTACCTGCCACTTCTCCATCCCACTCGCCGTCATCTCTTTCTGCTACGGCCGACTGGTGTGCACCGTCAAAGAGGC +CGCTGCCCAGCAACAGGAGTCCGAGACCACCCAGAGGGCTGAGCGGGAGGTCACCCGCATGGTCGTCATCATGGTCATCT +CCTTCCTGGTCTGCTGGGTGCCCTATGCCAGTGTGGCCTGGTACATCTTTACCCACCAGGGAAGCACTTTTGGGCCCATC +TTCATGACCATTCCATCCTTCTTTGCCAAGAGTTCAGCCCTCTACAACCCCATGATCTACATCTGCATGAACAAGCAGTT +CCGCCATTGCATGATCACCACCCTCTGCTGTGGGAAGAACCCCTTCGAGGAGGAGGATGGAGCGTCCGCCACTAGCTCTA +AAACTGAGGCTTCATCCGTGTCCTCCAGCTCTGTCTCCCCGGCATAAACCTTGTTTGACCGAACACCACGCATCAACACA +AAGACCAAGAATGCTGACTAAATGCTAACATTTCAGGGAAATCCAAAGACTTTTTACTATTTTTTTACACAACCATATAG +GTTGCAAACAGAGGTTTAGCCCTGTTTACAGGTTGTCATCAATGTGATGTCAGTATGTACAATATAGTCAACTTGATAGC +AAGTTGTTGGCTTATTTCAGATTGTATGGGCAATGTAATCAACCATATGTGAAATAAATTGCAA
--- a/tools/blast_rbh/README.rst Mon Sep 07 04:40:51 2015 -0400 +++ b/tools/blast_rbh/README.rst Wed Apr 19 07:44:47 2017 -0400 @@ -1,7 +1,7 @@ Find BLAST Reciprocal Best Hits (RBH), with Galaxy wrapper ========================================================== -This tool is copyright 2011-2015 by Peter Cock, The James Hutton Institute +This tool is copyright 2011-2017 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below. @@ -84,7 +84,13 @@ v0.1.7 - Reorder XML elements (internal change only). - Planemo for Tool Shed upload (``.shed.yml``, internal change only). - Updated citation information with GigaScience paper. -v0.1.8 - Updated to depend on NCBI BLAST+ 2.2.31 via ToolShed install. +v0.1.8 - Updated to depend on NCBI BLAST+ 2.2.31 via ToolShed install. +v0.1.9 - Updates to the command line API for the Python script. + - PEP8 style updates to the Python script (internal change only). + - Fix parameter help text which was not being displayed. +v0.1.11 - Updated to depend on NCBI BLAST+ 2.5.0 via ToolShed or BioConda. + - Update Biopython dependency. + - Tweak Python script to work under Python 2 or Python 3. ======= ====================================================================== @@ -98,17 +104,17 @@ Planemo commands (which requires you have set your Tool Shed access details in ``~/.planemo.yml`` and that you have access rights on the Tool Shed):: - $ planemo shed_update -t testtoolshed --check_diff ~/repositories/galaxy_blast/tools/blast_rbh/ + $ planemo shed_update -t testtoolshed --check_diff tools/blast_rbh/ ... or:: - $ planemo shed_update -t toolshed --check_diff ~/repositories/galaxy_blast/tools/blast_rbh/ + $ planemo shed_update -t toolshed --check_diff tools/blast_rbh/ ... To just build and check the tar ball, use:: - $ planemo shed_upload --tar_only ~/repositories/galaxy_blast/tools/blast_rbh/ + $ planemo shed_upload --tar_only tools/blast_rbh/ ... $ tar -tzf shed_upload.tar.gz test-data/four_human_proteins.fasta
--- a/tools/blast_rbh/blast_rbh.py Mon Sep 07 04:40:51 2015 -0400 +++ b/tools/blast_rbh/blast_rbh.py Wed Apr 19 07:44:47 2017 -0400 @@ -2,7 +2,7 @@ """BLAST Reciprocal Best Hit (RBH) from two FASTA input files. Run "blast_rbh.py -h" to see the help text, or read the associated -README.rst file which is also available on GitHub at: +blast_rbh.xml and README.rst files which are available on GitHub at: https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh This requires Python and the NCBI BLAST+ tools to be installed @@ -18,60 +18,78 @@ # results, rather than doing minimum HSP coverage in Python. # [Not doing this right now as would break on older BLAST+] +from __future__ import print_function + import os +import shutil import sys import tempfile -import shutil + from optparse import OptionParser -def stop_err( msg ): - sys.stderr.write("%s\n" % msg) - sys.exit(1) def run(cmd): return_code = os.system(cmd) if return_code: - stop_err("Error %i from: %s" % (return_code, cmd)) + sys.exit("Error %i from: %s" % (return_code, cmd)) + if "--version" in sys.argv[1:]: - #TODO - Capture version of BLAST+ binaries too? - print "BLAST RBH v0.1.6" + # TODO - Capture version of BLAST+ binaries too? + print("BLAST RBH v0.1.11") sys.exit(0) -#Parse Command Line +try: + threads = int(os.environ.get("GALAXY_SLOTS", "1")) +except ValueError: + threads = 1 +assert 1 <= threads, threads + +# Parse Command Line usage = """Use as follows: $ python blast_rbh.py [options] A.fasta B.fasta + +Many of the options are required. Example with proteins and blastp: + +$ python blast_rbh.py -a prot -t blasp -o output.tsv protA.fasta protB.fasta + +There is additional guideance in the help text in the blast_rbh.xml file, +which is shown to the user via the Galaxy interface to this tool. """ parser = OptionParser(usage=usage) parser.add_option("-a", "--alphabet", dest="dbtype", default=None, - help="Alphabet type (nucl or prot)") + help="Alphabet type (nucl or prot; required)") parser.add_option("-t", "--task", dest="task", default=None, - help="BLAST task (e.g. blastp, blastn, megablast)") -parser.add_option("-i","--identity", dest="min_identity", + help="BLAST task (e.g. blastp, blastn, megablast; required)") +parser.add_option("-i", "--identity", dest="min_identity", default="70", help="Minimum percentage identity (optional, default 70)") parser.add_option("-c", "--coverage", dest="min_coverage", default="50", help="Minimum HSP coverage (optional, default 50)") parser.add_option("--nr", dest="nr", default=False, action="store_true", - help="Preprocess FASTA files to collapse identifical " + help="Preprocess FASTA files to collapse identitical " "entries (make sequences non-redundant)") parser.add_option("-o", "--output", dest="output", default=None, metavar="FILE", - help="Output filename") + help="Output filename (required)") +parser.add_option("--threads", dest="threads", + default=threads, + help="Number of threads when running BLAST. Defaults to the " + "$GALAXY_SLOTS environment variable if set, or 1.") options, args = parser.parse_args() if len(args) != 2: - stop_err("Expects two input FASTA filenames") + sys.exit("Expects two input FASTA filenames") fasta_a, fasta_b = args if not os.path.isfile(fasta_a): - stop_err("Missing input file for species A: %r" % fasta_a) + sys.exit("Missing input file for species A: %r" % fasta_a) if not os.path.isfile(fasta_b): - stop_err("Missing input file for species B: %r" % fasta_b) + sys.exit("Missing input file for species B: %r" % fasta_b) if os.path.abspath(fasta_a) == os.path.abspath(fasta_b): self_comparison = True print("Doing self comparison; ignoring self matches.") @@ -79,48 +97,49 @@ self_comparison = False if not options.output: - stop_err("Output filename required, e.g. -o example.tab") + sys.exit("Output filename required, e.g. -o example.tab") out_file = options.output try: min_identity = float(options.min_identity) except ValueError: - stop_err("Expected number between 0 and 100 for minimum identity, not %r" % min_identity) + sys.exit("Expected number between 0 and 100 for minimum identity, not %r" % min_identity) if not (0 <= min_identity <= 100): - stop_err("Expected minimum identity between 0 and 100, not %0.2f" % min_identity) + sys.exit("Expected minimum identity between 0 and 100, not %0.2f" % min_identity) try: min_coverage = float(options.min_coverage) except ValueError: - stop_err("Expected number between 0 and 100 for minimum coverage, not %r" % min_coverage) + sys.exit("Expected number between 0 and 100 for minimum coverage, not %r" % min_coverage) if not (0 <= min_coverage <= 100): - stop_err("Expected minimum coverage between 0 and 100, not %0.2f" % min_coverage) + sys.exit("Expected minimum coverage between 0 and 100, not %0.2f" % min_coverage) if not options.task: - stop_err("Missing BLAST task, e.g. -t blastp") + sys.exit("Missing BLAST task, e.g. -t blastp") blast_type = options.task if not options.dbtype: - stop_err("Missing database type, -a nucl, or -a prot") + sys.exit("Missing database type, -a nucl, or -a prot") dbtype = options.dbtype if dbtype == "nucl": if blast_type in ["megablast", "blastn", "blastn-short", "dc-megablast"]: - blast_cmd = "blastn -task %s" % blast_type + blast_cmd = "blastn -task %s" % blast_type elif blast_type == "tblastx": blast_cmd = "tblastx" else: - stop_err("Invalid BLAST type for BLASTN: %r" % blast_type) + sys.exit("Invalid BLAST type for BLASTN: %r" % blast_type) elif dbtype == "prot": if blast_type not in ["blastp", "blastp-fast", "blastp-short"]: - stop_err("Invalid BLAST type for BLASTP: %r" % blast_type) + sys.exit("Invalid BLAST type for BLASTP: %r" % blast_type) blast_cmd = "blastp -task %s" % blast_type else: - stop_err("Expected 'nucl' or 'prot' for BLAST database type, not %r" % blast_type) + sys.exit("Expected 'nucl' or 'prot' for BLAST database type, not %r" % blast_type) try: - threads = int(os.environ.get("GALAXY_SLOTS", "1")) -except: - threads = 1 -assert 1 <= threads, threads + threads = int(options.threads) +except ValueError: + sys.exit("Expected positive integer for number of threads, not %r" % options.threads) +if threads < 1: + sys.exit("Expected positive integer for number of threads, not %r" % threads) makeblastdb_exe = "makeblastdb" @@ -138,7 +157,7 @@ b_vs_a = os.path.join(base_path, "B_vs_A.tabular") log = os.path.join(base_path, "blast.log") -cols = "qseqid sseqid bitscore pident qcovhsp qlen length" #Or qcovs? +cols = "qseqid sseqid bitscore pident qcovhsp qlen length" # Or qcovs? c_query = 0 c_match = 1 c_score = 2 @@ -149,6 +168,7 @@ tie_warning = 0 + def best_hits(blast_tabular, ignore_self=False): """Iterate over BLAST tabular output, returns best hits as 2-tuples. @@ -163,11 +183,18 @@ current = None best_score = None best = None + col_count = len(cols.split()) with open(blast_tabular) as h: for line in h: if line.startswith("#"): continue parts = line.rstrip("\n").split("\t") + if len(parts) != col_count: + # Using NCBI BLAST+ 2.2.27 the undefined field is ignored + # Even NCBI BLAST+ 2.5.0 silently ignores unknown fields :( + sys.exit("Old version of NCBI BLAST? Expected %i columns, got %i:\n%s\n" + "Note the qcovhsp field was only added in version 2.2.28\n" + % (col_count, len(parts), line)) if float(parts[c_identity]) < min_identity or float(parts[c_coverage]) < min_coverage: continue a = parts[c_query] @@ -177,71 +204,75 @@ score = float(parts[c_score]) qlen = int(parts[c_qlen]) length = int(parts[c_length]) - #print("Considering hit for %s to %s with score %s..." % (a, b, score)) + # print("Considering hit for %s to %s with score %s..." % (a, b, score)) if current is None: - #First hit + # First hit assert best is None assert best_score is None best = dict() - #Now append this hit... + # Now append this hit... elif a != current: - #New hit + # New hit if len(best) == 1: - #Unambiguous (no tied matches) + # Unambiguous (no tied matches) yield current, list(best.values())[0] else: - #print("%s has %i equally good hits: %s" % (a, len(best), ", ".join(best))) + # print("%s has %i equally good hits: %s" % (a, len(best), ", ".join(best))) tie_warning += 1 best = dict() - #Now append this hit... + # Now append this hit... elif score < best_score: - #print("No improvement for %s, %s < %s" % (a, score, best_score)) + # print("No improvement for %s, %s < %s" % (a, score, best_score)) continue elif score > best_score: - #This is better, discard old best + # This is better, discard old best best = dict() - #Now append this hit... + # Now append this hit... else: - #print("Tied best hits for %s" % a) + # print("Tied best hits for %s" % a) assert best_score == score - #Now append this hit... + # Now append this hit... current = a best_score = score - #This will collapse two equally good hits to the same target (e.g. duplicated domain) + # This will collapse two equally good hits to the same target (e.g. duplicated domain) best[b] = (b, score, parts[c_score], parts[c_identity], parts[c_coverage], qlen, length) - #Best hit for final query, if unambiguous: + # Best hit for final query, if unambiguous: if current is not None: - if len(best)==1: + if len(best) == 1: yield current, list(best.values())[0] else: - #print("%s has %i equally good hits: %s" % (a, len(best), ", ".join(best))) + # print("%s has %i equally good hits: %s" % (a, len(best), ", ".join(best))) tie_warning += 1 + def check_duplicate_ids(filename): # Copied from tools/ncbi_blast_plus/check_no_duplicates.py # TODO - just use Biopython's FASTA parser? if not os.path.isfile(filename): - stop_err("Missing FASTA file %r" % filename, 2) + sys.stderr.write("Missing FASTA file %r\n" % filename) + sys.exit(2) identifiers = set() handle = open(filename) for line in handle: if line.startswith(">"): - # The split will also take care of the new line character, - # e.g. ">test\n" and ">test description here\n" both give "test" + # The split will also take care of the new line character, + # e.g. ">test\n" and ">test description here\n" both give "test" seq_id = line[1:].split(None, 1)[0] if seq_id in identifiers: handle.close() - stop_err("Repeated identifiers, e.g. %r" % seq_id, 3) + sys.stderr.write("Repeated identifiers, e.g. %r\n" % seq_id) + sys.exit(3) identifiers.add(seq_id) handle.close() + def make_nr(input_fasta, output_fasta, sep=";"): - #TODO - seq-hash based to avoid loading everything into RAM? + # TODO - seq-hash based to avoid loading everything into RAM? by_seq = dict() try: from Bio import SeqIO except KeyError: - stop_err("Missing Biopython") + sys.exit("Missing Biopython") for record in SeqIO.parse(input_fasta, "fasta"): s = str(record.seq).upper() try: @@ -259,7 +290,7 @@ unique += 1 del by_seq if duplicates: - #TODO - refactor as a generator with single SeqIO.write(...) call + # TODO - refactor as a generator with single SeqIO.write(...) call with open(output_fasta, "w") as handle: for record in SeqIO.parse(input_fasta, "fasta"): if record.id in representatives: @@ -269,12 +300,14 @@ elif record.id in duplicates: continue SeqIO.write(record, handle, "fasta") - print("%i unique entries; removed %i duplicates leaving %i representative records" % (unique, len(duplicates), len(representatives))) + print("%i unique entries; removed %i duplicates leaving %i representative records" + % (unique, len(duplicates), len(representatives))) else: os.symlink(os.path.abspath(input_fasta), output_fasta) print("No perfect duplicates in file, %i unique entries" % unique) -#print("Starting...") + +# print("Starting...") check_duplicate_ids(fasta_a) if not self_comparison: check_duplicate_ids(fasta_b) @@ -286,18 +319,18 @@ fasta_a = tmp_a fasta_b = tmp_b -#TODO - Report log in case of error? +# TODO - Report log in case of error? run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_a, db_a, log)) if not self_comparison: run('%s -dbtype %s -in "%s" -out "%s" -logfile "%s"' % (makeblastdb_exe, dbtype, fasta_b, db_b, log)) -#print("BLAST databases prepared.") +# print("BLAST databases prepared.") run('%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i' % (blast_cmd, fasta_a, db_b, a_vs_b, cols, threads)) -#print("BLAST species A vs species B done.") +# print("BLAST species A vs species B done.") if not self_comparison: run('%s -query "%s" -db "%s" -out "%s" -outfmt "6 %s" -num_threads %i' % (blast_cmd, fasta_b, db_a, b_vs_a, cols, threads)) - #print("BLAST species B vs species A done.") + # print("BLAST species B vs species A done.") best_b_vs_a = dict(best_hits(b_vs_a, self_comparison)) @@ -306,19 +339,20 @@ count = 0 outfile = open(out_file, 'w') outfile.write("#A_id\tB_id\tA_length\tB_length\tA_qcovhsp\tB_qcovhsp\tlength\tpident\tbitscore\n") -for a, (b, a_score_float, a_score_str, a_identity_str, a_coverage_str, a_qlen, a_length) in best_hits(a_vs_b, self_comparison): +for a, (b, a_score_float, a_score_str, + a_identity_str, a_coverage_str, a_qlen, a_length) in best_hits(a_vs_b, self_comparison): if b not in best_b_vs_a: - #Match b has no best hit + # Match b has no best hit continue a2, b_score_float, b_score_str, b_identity_str, b_coverage_str, b_qlen, b_length = best_b_vs_a[b] if a != a2: - #Not an RBH + # Not an RBH continue - #Start with IDs, lengths, coverage + # Start with IDs, lengths, coverage values = [a, b, a_qlen, b_qlen, a_coverage_str, b_coverage_str] - #Alignment length was an integer so don't care about original string + # Alignment length was an integer so don't care about original string values.append(min(a_length, b_length)) - #Output the original string versions of the scores + # Output the original string versions of the scores if float(a_identity_str) < float(b_identity_str): values.append(a_identity_str) else: @@ -330,9 +364,9 @@ outfile.write("%s\t%s\t%i\t%i\t%s\t%s\t%i\t%s\t%s\n" % tuple(values)) count += 1 outfile.close() -print "Done, %i RBH found" % count +print("Done, %i RBH found" % count) if tie_warning: sys.stderr.write("Warning: Sequences with tied best hits found, you may have duplicates/clusters\n") -#Remove temp files... +# Remove temp files... shutil.rmtree(base_path)
--- a/tools/blast_rbh/blast_rbh.xml Mon Sep 07 04:40:51 2015 -0400 +++ b/tools/blast_rbh/blast_rbh.xml Wed Apr 19 07:44:47 2017 -0400 @@ -1,12 +1,8 @@ -<tool id="blast_reciprocal_best_hits" name="BLAST Reciprocal Best Hits (RBH)" version="0.1.8"> +<tool id="blast_reciprocal_best_hits" name="BLAST Reciprocal Best Hits (RBH)" version="0.1.11"> <description>from two FASTA files</description> <requirements> - <requirement type="package" version="1.64">biopython</requirement> - <requirement type="python-module">Bio</requirement> - <requirement type="binary">makeblastdb</requirement> - <requirement type="binary">blastp</requirement> - <requirement type="binary">blastn</requirement> - <requirement type="package" version="2.2.31">blast+</requirement> + <requirement type="package" version="1.67">biopython</requirement> + <requirement type="package" version="2.5.0">blast</requirement> </requirements> <stdio> <!-- Anything other than zero is an error --> @@ -33,10 +29,10 @@ <!-- Galaxy does not have sub-types for protein vs nucletide FASTA --> <param name="fasta_a" type="data" format="fasta" label="Genes/proteins from species A" - description="FASTA file, one sequence per gene/protein." /> + help="FASTA file, one sequence per gene/protein." /> <param name="fasta_b" type="data" format="fasta" label="Genes/proteins from species B" - description="FASTA file, one sequence per gene/protein." /> + help="FASTA file, one sequence per gene/protein." /> <conditional name="seq"> <param name="dbtype" type="select" label="Molecule type of FASTA inputs"> <option value="prot">protein</option>
--- a/tools/blast_rbh/tool_dependencies.xml Mon Sep 07 04:40:51 2015 -0400 +++ b/tools/blast_rbh/tool_dependencies.xml Wed Apr 19 07:44:47 2017 -0400 @@ -1,9 +1,9 @@ <?xml version="1.0"?> <tool_dependency> - <package name="biopython" version="1.64"> - <repository changeset_revision="5477a05cc158" name="package_biopython_1_64" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="biopython" version="1.67"> + <repository changeset_revision="a42f244cce44" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - <package name="blast+" version="2.2.31"> - <repository changeset_revision="e36f75574aec" name="package_blast_plus_2_2_31" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="blast" version="2.5.0"> + <repository changeset_revision="5dd2b68c7d04" name="package_blast_plus_2_5_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>