# HG changeset patch # User peterjc # Date 1386244559 18000 # Node ID 4c4a0da938ffcaa86e7efd1a48c3fac0b7cd5a12 # Parent 70e7dcbf6573e5f4d9f5096b0de7b5b4e5998935 Uploaded v0.0.22, now wraps BLAST+ 2.2.28 allowing extended tabular output to include the hit descriptions as column 25. Supports $GALAXY_SLOTS. Includes more tests and heavy use of macros. diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastn_arabidopsis.extended.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_arabidopsis.extended.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,1 @@ +chunk_of_plant chrIII 100.00 630 0 0 1 630 4341 4970 0.0 1164 chrIII 630 630 630 0 100.00 1 1 GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT 630 23459830 gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastn_arabidopsis.standard.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_arabidopsis.standard.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,1 @@ +chunk_of_plant chrIII 100.00 630 0 0 1 630 4341 4970 0.0 1164 diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastn_arabidopsis.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_arabidopsis.xml Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,71 @@ + + + + blastn + BLASTN 2.2.28+ + Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), "A greedy algorithm for aligning DNA sequences", J Comput Biol 2000; 7(1-2):203-14. + /mnt/galaxy/galaxy-central/database/files/000/dataset_857_files/blastdb + Query_1 + chunk_of_plant + 630 + + + 0.001 + 1 + -2 + 0 + 0 + L;m; + + + + + 1 + Query_1 + chunk_of_plant + 630 + + + 1 + gnl|BL_ORD_ID|2 + chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence + 2 + 23459830 + + + 1 + 1164.51 + 630 + 0 + 1 + 630 + 4341 + 4970 + 1 + 1 + 630 + 630 + 0 + 630 + GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT + GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| + + + + + + + 5 + 119146348 + 26 + 71964315672 + 0.46 + 1.28 + 0.85 + + + + + + diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastn_rhodopsin_vs_three_human.tabular --- a/test-data/blastn_rhodopsin_vs_three_human.tabular Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastn_rhodopsin_vs_three_human.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -1,7 +1,7 @@ -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.10 1050 77 6 1 1047 88 1134 0.0 1474 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 9e-133 460 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 7e-94 331 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 8e-74 265 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.98 171 10 2 2854 3023 615 784 8e-69 248 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.58 962 75 6 1 959 118 1076 0.0 1323 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.55 1052 121 10 1 1047 88 1134 0.0 1208 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 8e-69 248 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208 diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastp_four_human_vs_rhodopsin.xml --- a/test-data/blastp_four_human_vs_rhodopsin.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastp_four_human_vs_rhodopsin.xml Thu Dec 05 06:55:59 2013 -0500 @@ -2,7 +2,7 @@ blastp - BLASTP 2.2.26+ + BLASTP 2.2.28+ Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402. sp|Q9BS26|ERP44_HUMAN @@ -17,630 +17,649 @@ F - - - 1 - sp|Q9BS26|ERP44_HUMAN - Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 - 406 - - - - 0 - 0 - 30 - 119568 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 2 - sp|Q9BS26|ERP44_HUMAN - Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 - 406 - - - - 0 - 0 - 30 - 119568 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 3 - sp|Q9BS26|ERP44_HUMAN - Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 - 406 - - - - 0 - 0 - 30 - 119568 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 4 - sp|Q9BS26|ERP44_HUMAN - Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 - 406 - - - - 0 - 0 - 30 - 119568 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 5 - sp|Q9BS26|ERP44_HUMAN - Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 - 406 - - - - 0 - 0 - 30 - 119568 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 6 - sp|Q9BS26|ERP44_HUMAN - Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 - 406 - - - - 0 - 0 - 30 - 119568 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 7 - sp|Q9NSY1|BMP2K_HUMAN - BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 - 1161 - - - - 0 - 0 - 38 - 348130 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 8 - sp|Q9NSY1|BMP2K_HUMAN - BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 - 1161 - - - - 0 - 0 - 38 - 348130 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 9 - sp|Q9NSY1|BMP2K_HUMAN - BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 - 1161 - - - - 0 - 0 - 38 - 348130 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 10 - sp|Q9NSY1|BMP2K_HUMAN - BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 - 1161 - - - - 0 - 0 - 38 - 348130 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 11 - sp|Q9NSY1|BMP2K_HUMAN - BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 - 1161 - - - - 0 - 0 - 38 - 348130 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 12 - sp|Q9NSY1|BMP2K_HUMAN - BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 - 1161 - - - - 0 - 0 - 38 - 348130 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 13 - sp|P06213|INSR_HUMAN - Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 - 1382 - - - - 0 - 0 - 39 - 414987 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 14 - sp|P06213|INSR_HUMAN - Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 - 1382 - - - - 0 - 0 - 39 - 414987 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 15 - sp|P06213|INSR_HUMAN - Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 - 1382 - - - - 0 - 0 - 39 - 414987 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 16 - sp|P06213|INSR_HUMAN - Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 - 1382 - - - - 0 - 0 - 39 - 414987 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 17 - sp|P06213|INSR_HUMAN - Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 - 1382 - - - - 0 - 0 - 39 - 414987 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 18 - sp|P06213|INSR_HUMAN - Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 - 1382 - - - - 0 - 0 - 39 - 414987 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 19 - sp|P08100|OPSD_HUMAN - Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - 348 - - - 1 - gi|57163783|ref|NP_001009242.1| - rhodopsin [Felis catus] - NP_001009242 - 348 - - - 1 - 701.049 - 1808 - 0 - 1 - 348 - 1 - 348 - 0 - 0 - 336 - 343 - 0 - 348 - MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA - MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA - MNGTEGPNFYVPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T SKTETSQVAPA - - - - - - - 0 - 0 - 29 - 101761 - 0.041 - 0.267 - 0.14 - - - - - 20 - sp|P08100|OPSD_HUMAN - Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - 348 - - - 1 - gi|3024260|sp|P56514.1|OPSD_BUFBU - RecName: Full=Rhodopsin - P56514 - 354 - - - 1 - 619.002 - 1595 - 0 - 1 - 341 - 1 - 342 - 0 - 0 - 290 - 322 - 1 - 342 - MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE - MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE - MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S++GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF++QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+A SA SKTE - - - - - - - 0 - 0 - 29 - 101761 - 0.041 - 0.267 - 0.14 - - - - - 21 - sp|P08100|OPSD_HUMAN - Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - 348 - - - 1 - gi|283855846|gb|ADB45242.1| - rhodopsin [Cynopterus brachyotis] - ADB45242 - 328 - - - 1 - 653.284 - 1684 - 0 - 11 - 338 - 1 - 328 - 0 - 0 - 311 - 321 - 0 - 328 - VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS - VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS - VPFSN TGVVRSPFE+PQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+A TWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T S - - - - - - - 0 - 0 - 29 - 101761 - 0.041 - 0.267 - 0.14 - - - - - 22 - sp|P08100|OPSD_HUMAN - Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - 348 - - - 1 - gi|283855823|gb|ADB45229.1| - rhodopsin [Myotis pilosus] - ADB45229 - 328 - - - 1 - 631.328 - 1627 - 0 - 11 - 338 - 1 - 328 - 0 - 0 - 311 - 323 - 0 - 328 - VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS - VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS - VPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVA+LFMV GGFT+TLYTS+HGYFVFG TGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+AFTWVMALACAAPPLAGWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMV+AFLICW+PYASVAFYIFTHQGSNFGP+FMTIPAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T S - - - - - - - 0 - 0 - 29 - 101761 - 0.041 - 0.267 - 0.14 - - - - - 23 - sp|P08100|OPSD_HUMAN - Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - 348 - - - 1 - gi|223523|prf||0811197A - rhodopsin [Bos taurus] - 0811197A - 347 - - - 1 - 673.315 - 1736 - 0 - 1 - 348 - 1 - 347 - 0 - 0 - 324 - 336 - 1 - 348 - MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA - MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA - MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA - - - - - - - 0 - 0 - 29 - 101761 - 0.041 - 0.267 - 0.14 - - - - - 24 - sp|P08100|OPSD_HUMAN - Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - 348 - - - 1 - gi|12583665|dbj|BAB21486.1| - fresh water form rod opsin [Conger myriaster] - BAB21486 - 354 - - - 1 - 599.356 - 1544 - 0 - 1 - 341 - 1 - 342 - 0 - 0 - 281 - 314 - 1 - 342 - MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE - MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE - MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +D ASAT SKTE - - - - - - - 0 - 0 - 29 - 101761 - 0.041 - 0.267 - 0.14 - - - - - \ No newline at end of file + + + 1 + sp|Q9BS26|ERP44_HUMAN + Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 + 406 + + + + + 0 + 0 + 30 + 119568 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 2 + sp|Q9BS26|ERP44_HUMAN + Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 + 406 + + + + + 0 + 0 + 30 + 119568 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 3 + sp|Q9BS26|ERP44_HUMAN + Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 + 406 + + + + + 0 + 0 + 30 + 119568 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 4 + sp|Q9BS26|ERP44_HUMAN + Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 + 406 + + + + + 0 + 0 + 30 + 119568 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 5 + sp|Q9BS26|ERP44_HUMAN + Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 + 406 + + + + + 0 + 0 + 30 + 119568 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 6 + sp|Q9BS26|ERP44_HUMAN + Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 + 406 + + + + + 0 + 0 + 30 + 119568 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 7 + sp|Q9NSY1|BMP2K_HUMAN + BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 + 1161 + + + + + 0 + 0 + 38 + 348130 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 8 + sp|Q9NSY1|BMP2K_HUMAN + BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 + 1161 + + + + + 0 + 0 + 38 + 348130 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 9 + sp|Q9NSY1|BMP2K_HUMAN + BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 + 1161 + + + + + 0 + 0 + 38 + 348130 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 10 + sp|Q9NSY1|BMP2K_HUMAN + BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 + 1161 + + + + + 0 + 0 + 38 + 348130 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 11 + sp|Q9NSY1|BMP2K_HUMAN + BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 + 1161 + + + + + 0 + 0 + 38 + 348130 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 12 + sp|Q9NSY1|BMP2K_HUMAN + BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 + 1161 + + + + + 0 + 0 + 38 + 348130 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 13 + sp|P06213|INSR_HUMAN + Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 + 1382 + + + + + 0 + 0 + 39 + 414987 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 14 + sp|P06213|INSR_HUMAN + Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 + 1382 + + + + + 0 + 0 + 39 + 414987 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 15 + sp|P06213|INSR_HUMAN + Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 + 1382 + + + + + 0 + 0 + 39 + 414987 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 16 + sp|P06213|INSR_HUMAN + Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 + 1382 + + + + + 0 + 0 + 39 + 414987 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 17 + sp|P06213|INSR_HUMAN + Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 + 1382 + + + + + 0 + 0 + 39 + 414987 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 18 + sp|P06213|INSR_HUMAN + Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 + 1382 + + + + + 0 + 0 + 39 + 414987 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 19 + sp|P08100|OPSD_HUMAN + Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + 348 + + + 1 + gi|57163783|ref|NP_001009242.1| + rhodopsin [Felis catus] + NP_001009242 + 348 + + + 1 + 701.049 + 1808 + 0 + 1 + 348 + 1 + 348 + 0 + 0 + 336 + 343 + 0 + 348 + MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA + MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA + MNGTEGPNFYVPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T SKTETSQVAPA + + + + + + + 0 + 0 + 29 + 101761 + 0.041 + 0.267 + 0.14 + + + + + 20 + sp|P08100|OPSD_HUMAN + Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + 348 + + + 1 + gi|3024260|sp|P56514.1|OPSD_BUFBU + RecName: Full=Rhodopsin + P56514 + 354 + + + 1 + 619.002 + 1595 + 0 + 1 + 341 + 1 + 342 + 0 + 0 + 290 + 322 + 1 + 342 + MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE + MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE + MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S++GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF++QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+A SA SKTE + + + + + + + 0 + 0 + 29 + 101761 + 0.041 + 0.267 + 0.14 + + + + + 21 + sp|P08100|OPSD_HUMAN + Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + 348 + + + 1 + gi|283855846|gb|ADB45242.1| + rhodopsin [Cynopterus brachyotis] + ADB45242 + 328 + + + 1 + 653.284 + 1684 + 0 + 11 + 338 + 1 + 328 + 0 + 0 + 311 + 321 + 0 + 328 + VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS + VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS + VPFSN TGVVRSPFE+PQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+A TWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T S + + + + + + + 0 + 0 + 29 + 101761 + 0.041 + 0.267 + 0.14 + + + + + 22 + sp|P08100|OPSD_HUMAN + Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + 348 + + + 1 + gi|283855823|gb|ADB45229.1| + rhodopsin [Myotis pilosus] + ADB45229 + 328 + + + 1 + 631.328 + 1627 + 0 + 11 + 338 + 1 + 328 + 0 + 0 + 311 + 323 + 0 + 328 + VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS + VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS + VPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVA+LFMV GGFT+TLYTS+HGYFVFG TGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+AFTWVMALACAAPPLAGWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMV+AFLICW+PYASVAFYIFTHQGSNFGP+FMTIPAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T S + + + + + + + 0 + 0 + 29 + 101761 + 0.041 + 0.267 + 0.14 + + + + + 23 + sp|P08100|OPSD_HUMAN + Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + 348 + + + 1 + gi|223523|prf||0811197A + rhodopsin [Bos taurus] + 0811197A + 347 + + + 1 + 673.315 + 1736 + 0 + 1 + 348 + 1 + 347 + 0 + 0 + 324 + 336 + 1 + 348 + MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA + MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA + MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA + + + + + + + 0 + 0 + 29 + 101761 + 0.041 + 0.267 + 0.14 + + + + + 24 + sp|P08100|OPSD_HUMAN + Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + 348 + + + 1 + gi|12583665|dbj|BAB21486.1| + fresh water form rod opsin [Conger myriaster] + BAB21486 + 354 + + + 1 + 599.356 + 1544 + 0 + 1 + 341 + 1 + 342 + 0 + 0 + 281 + 314 + 1 + 342 + MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE + MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE + MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +D ASAT SKTE + + + + + + + 0 + 0 + 29 + 101761 + 0.041 + 0.267 + 0.14 + + + + + + diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular --- a/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -1,6 +1,6 @@ -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 -sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 -sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 -sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 -sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 -sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 rhodopsin [Felis catus] +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 RecName: Full=Rhodopsin +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Cynopterus brachyotis] +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 rhodopsin [Myotis pilosus] +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 rhodopsin [Bos taurus] +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 fresh water form rod opsin [Conger myriaster] diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastp_four_human_vs_rhodopsin_ext.tabular --- a/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -1,6 +1,6 @@ -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 -sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 -sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 -sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 -sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 -sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 N/A +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 N/A +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 N/A +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 N/A diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular --- a/test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -1,8 +1,8 @@ -sp|Q9BS26|ERP44_HUMAN gi|193885198|pdb|2R2J|A 97.11 381 11 0 26 406 2 382 0.0 768 gi|193885198|pdb|2R2J|A 1982 370 372 0 97.64 1 1 PVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL PLGSEITSLDTENIDEILNNADVALVNFYADWCRFSQXLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGXXXKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDXVYLGAXTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHXKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHXYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL 406 382 -sp|Q9BS26|ERP44_HUMAN gi|88192228|pdb|2B5E|A 25.17 290 193 8 25 306 10 283 4e-20 95.1 gi|88192228|pdb|2B5E|A;gi|206581884|pdb|3BOA|A 235 73 133 24 45.86 1 1 TPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKR-EYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNI---IYKPPGHSAPDMVYLGA---MTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKF-RH APEDSAVVKLATDSFNEYIQSHDLVLAEFFAPWCGHCKNMAPEYVKAAETLVEK-----NITLAQIDCTENQDLCMEHNIPGFPSLKIFKNSDVNNSIDYEGPRTAEAIVQFMIKQSQPAVAVVADLPAYLANETFVTPVIVQSGKIDADFNATFYSMANKHFNDYDFVSA--------ENADDDFKLSIYLPSAMDEP-VVYNGKKADIADADVFEKWLQVEALPYFGEIDGSVFAQYVESGLPLGYLFY--NDEEELEEYKPLFTELAKKNRGLMNFVSIDARKFGRH 406 504 -sp|Q9NSY1|BMP2K_HUMAN gi|73536291|pdb|2BUJ|A 29.39 279 182 8 40 308 21 294 1e-22 105 gi|73536291|pdb|2BUJ|A;gi|73536292|pdb|2BUJ|B 262 82 130 15 46.59 1 1 GVRVFAVGRHQVTLEESLAEGGFSTVFLVR-THGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTG--FTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDG-VNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPF------GESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDI GHMVIIDNKHYLFIQK-LGEGGFSYVDLVEGLHDGHFYALKRILCHEQQDREEAQREAD-MHRLFNHPNILRLVAYCLRERGAKH-EAWLLLPFFKRGTLWNEIERLKDKGNFLTEDQILWLLLGICRGLEAIH--AKGYAHRDLKPTNILLGDEGQPVLMDLGSMNQACIHVEGSRQALTLQDWAAQRCTISYRAPELFSVQSHCVIDERTDVWSLGCVLYAMMFGEGPYDMVFQKGDSVALAVQNQLSIPQSPRHSSALWQLLNSMMTVDPHQRPHI 1161 317 -sp|Q9NSY1|BMP2K_HUMAN gi|270346335|pdb|2WQM|A 27.21 272 166 12 53 311 36 288 6e-17 86.3 gi|270346335|pdb|2WQM|A;gi|270346336|pdb|2WQN|A 212 74 129 32 47.43 1 1 LEESLAEGGFSTVFLVRTH-GGIRCALKRMYVNNMPDLNV---CKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMN--KKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPFGESQV---AICD----GNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQV IEKKIGRGQFSEVYRAACLLDGVPVALKKVQIFDLMDAKARADCIKEIDLLKQLN-HPNVIKYY---ASFIEDN--ELNIVLELADAGDLSRMIKHFKKQKRLIPERTVWKYFVQLCSALEHMHSRR--VMHRDIKPANVFITATGVVKLGDLG--LGRFFSSKTTAAHSL------VGTPYYMSPERIHENG---YNFKSDIWSLGCLLYEMAALQSPFYGDKMNLYSLCKKIEQCDYPPLPSDHYSEELRQLVNMCINPDPEKRPDVTYV 1161 310 -sp|P06213|INSR_HUMAN gi|116667097|pdb|2DTG|E 95.91 928 7 2 28 955 1 897 0.0 1846 gi|116667097|pdb|2DTG|E 4781 890 893 31 96.23 1 1 HLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCVSRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIA HLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNHIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFV------------PRPSRKRRSLGDVGNA-------------------GNNEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCDTRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIA 1382 897 -sp|P06213|INSR_HUMAN gi|114794482|pdb|2HR7|A 99.59 485 2 0 28 512 1 485 0.0 1016 gi|114794482|pdb|2HR7|A;gi|114794483|pdb|2HR7|B 2628 483 485 0 100.00 1 1 HLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELLKFSYIRTSFDKI HLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNHIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDKASCENELLKFSYIRTSFDKI 1382 486 -sp|P08100|OPSD_HUMAN gi|16975387|pdb|1JFP|A 93.39 348 23 0 1 348 1 348 0.0 681 gi|16975387|pdb|1JFP|A;gi|22219255|pdb|1LN6|A;gi|157878065|pdb|1GZM|A;gi|157878066|pdb|1GZM|B;gi|157878298|pdb|1HZX|A;gi|157878299|pdb|1HZX|B;gi|157878979|pdb|1L9H|A;gi|157878980|pdb|1L9H|B;gi|157880263|pdb|1U19|A;gi|157880264|pdb|1U19|B;gi|157883606|pdb|2G87|A;gi|157883607|pdb|2G87|B;gi|157883830|pdb|2HPY|A;gi|157883831|pdb|2HPY|B;gi|157883860|pdb|2I35|A;gi|157883861|pdb|2I36|A;gi|157883862|pdb|2I36|B;gi|157883863|pdb|2I36|C;gi|157883864|pdb|2I37|A;gi|157883865|pdb|2I37|B;gi|157883866|pdb|2I37|C;gi|159795066|pdb|2PED|A;gi|159795067|pdb|2PED|B;gi|192988480|pdb|3CAP|A;gi|192988481|pdb|3CAP|B;gi|195927457|pdb|3C9L|A;gi|197107530|pdb|1F88|A;gi|197107531|pdb|1F88|B;gi|206582030|pdb|3DQB|A 1756 325 337 0 96.84 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 348 -sp|P08100|OPSD_HUMAN gi|195927458|pdb|3C9M|A 93.10 348 24 0 1 348 1 348 0.0 674 gi|195927458|pdb|3C9M|A 1738 324 335 0 96.26 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MCGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSCFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 348 +sp|Q9BS26|ERP44_HUMAN gi|193885198|pdb|2R2J|A 97.11 381 11 0 26 406 2 382 0.0 768 gi|193885198|pdb|2R2J|A 1982 370 372 0 97.64 1 1 PVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL PLGSEITSLDTENIDEILNNADVALVNFYADWCRFSQXLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGXXXKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDXVYLGAXTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHXKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHXYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL 406 382 Chain A, Crystal Structure Of Human Erp44 +sp|Q9BS26|ERP44_HUMAN gi|88192228|pdb|2B5E|A 25.17 290 193 8 25 306 10 283 4e-20 95.1 gi|88192228|pdb|2B5E|A;gi|206581884|pdb|3BOA|A 235 73 133 24 45.86 1 1 TPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKR-EYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNI---IYKPPGHSAPDMVYLGA---MTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKF-RH APEDSAVVKLATDSFNEYIQSHDLVLAEFFAPWCGHCKNMAPEYVKAAETLVEK-----NITLAQIDCTENQDLCMEHNIPGFPSLKIFKNSDVNNSIDYEGPRTAEAIVQFMIKQSQPAVAVVADLPAYLANETFVTPVIVQSGKIDADFNATFYSMANKHFNDYDFVSA--------ENADDDFKLSIYLPSAMDEP-VVYNGKKADIADADVFEKWLQVEALPYFGEIDGSVFAQYVESGLPLGYLFY--NDEEELEEYKPLFTELAKKNRGLMNFVSIDARKFGRH 406 504 Chain A, Crystal Structure Of Yeast Protein Disulfide Isomerase<>Chain A, Crystal Structure Of Yeast Protein Disulfide Isomerase. +sp|Q9NSY1|BMP2K_HUMAN gi|73536291|pdb|2BUJ|A 29.39 279 182 8 40 308 21 294 1e-22 105 gi|73536291|pdb|2BUJ|A;gi|73536292|pdb|2BUJ|B 262 82 130 15 46.59 1 1 GVRVFAVGRHQVTLEESLAEGGFSTVFLVR-THGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTG--FTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDG-VNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPF------GESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDI GHMVIIDNKHYLFIQK-LGEGGFSYVDLVEGLHDGHFYALKRILCHEQQDREEAQREAD-MHRLFNHPNILRLVAYCLRERGAKH-EAWLLLPFFKRGTLWNEIERLKDKGNFLTEDQILWLLLGICRGLEAIH--AKGYAHRDLKPTNILLGDEGQPVLMDLGSMNQACIHVEGSRQALTLQDWAAQRCTISYRAPELFSVQSHCVIDERTDVWSLGCVLYAMMFGEGPYDMVFQKGDSVALAVQNQLSIPQSPRHSSALWQLLNSMMTVDPHQRPHI 1161 317 Chain A, Crystal Structure Of The Human Serine-Threonine Kinase 16 In Complex With Staurosporine<>Chain B, Crystal Structure Of The Human Serine-Threonine Kinase 16 In Complex With Staurosporine +sp|Q9NSY1|BMP2K_HUMAN gi|270346335|pdb|2WQM|A 27.21 272 166 12 53 311 36 288 6e-17 86.3 gi|270346335|pdb|2WQM|A;gi|270346336|pdb|2WQN|A 212 74 129 32 47.43 1 1 LEESLAEGGFSTVFLVRTH-GGIRCALKRMYVNNMPDLNV---CKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMN--KKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPFGESQV---AICD----GNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQV IEKKIGRGQFSEVYRAACLLDGVPVALKKVQIFDLMDAKARADCIKEIDLLKQLN-HPNVIKYY---ASFIEDN--ELNIVLELADAGDLSRMIKHFKKQKRLIPERTVWKYFVQLCSALEHMHSRR--VMHRDIKPANVFITATGVVKLGDLG--LGRFFSSKTTAAHSL------VGTPYYMSPERIHENG---YNFKSDIWSLGCLLYEMAALQSPFYGDKMNLYSLCKKIEQCDYPPLPSDHYSEELRQLVNMCINPDPEKRPDVTYV 1161 310 Chain A, Structure Of Apo Human Nek7<>Chain A, Structure Of Adp-Bound Human Nek7 +sp|P06213|INSR_HUMAN gi|116667097|pdb|2DTG|E 95.91 928 7 2 28 955 1 897 0.0 1846 gi|116667097|pdb|2DTG|E 4781 890 893 31 96.23 1 1 HLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCVSRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIA HLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNHIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFV------------PRPSRKRRSLGDVGNA-------------------GNNEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCDTRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIA 1382 897 Chain E, Insulin Receptor (Ir) Ectodomain In Complex With Fab's +sp|P06213|INSR_HUMAN gi|114794482|pdb|2HR7|A 99.59 485 2 0 28 512 1 485 0.0 1016 gi|114794482|pdb|2HR7|A;gi|114794483|pdb|2HR7|B 2628 483 485 0 100.00 1 1 HLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELLKFSYIRTSFDKI HLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNHIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDKASCENELLKFSYIRTSFDKI 1382 486 Chain A, Insulin Receptor (Domains 1-3)<>Chain B, Insulin Receptor (Domains 1-3) +sp|P08100|OPSD_HUMAN gi|16975387|pdb|1JFP|A 93.39 348 23 0 1 348 1 348 0.0 681 gi|16975387|pdb|1JFP|A;gi|22219255|pdb|1LN6|A;gi|157878065|pdb|1GZM|A;gi|157878066|pdb|1GZM|B;gi|157878298|pdb|1HZX|A;gi|157878299|pdb|1HZX|B;gi|157878979|pdb|1L9H|A;gi|157878980|pdb|1L9H|B;gi|157880263|pdb|1U19|A;gi|157880264|pdb|1U19|B;gi|157883606|pdb|2G87|A;gi|157883607|pdb|2G87|B;gi|157883830|pdb|2HPY|A;gi|157883831|pdb|2HPY|B;gi|157883860|pdb|2I35|A;gi|157883861|pdb|2I36|A;gi|157883862|pdb|2I36|B;gi|157883863|pdb|2I36|C;gi|157883864|pdb|2I37|A;gi|157883865|pdb|2I37|B;gi|157883866|pdb|2I37|C;gi|159795066|pdb|2PED|A;gi|159795067|pdb|2PED|B;gi|192988480|pdb|3CAP|A;gi|192988481|pdb|3CAP|B;gi|195927457|pdb|3C9L|A;gi|197107530|pdb|1F88|A;gi|197107531|pdb|1F88|B;gi|206582030|pdb|3DQB|A 1756 325 337 0 96.84 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 348 Chain A, Structure Of Bovine Rhodopsin (Dark Adapted)<>Chain A, Structure Of Bovine Rhodopsin (Metarhodopsin Ii)<>Chain A, Structure Of Bovine Rhodopsin In A Trigonal Crystal Form<>Chain B, Structure Of Bovine Rhodopsin In A Trigonal Crystal Form<>Chain A, Crystal Structure Of Bovine Rhodopsin<>Chain B, Crystal Structure Of Bovine Rhodopsin<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution<>Chain A, Crystallographic Model Of Bathorhodopsin<>Chain B, Crystallographic Model Of Bathorhodopsin<>Chain A, Crystallographic Model Of Lumirhodopsin<>Chain B, Crystallographic Model Of Lumirhodopsin<>Chain A, Crystal Structure Of Rhombohedral Crystal Form Of Ground- State Rhodopsin<>Chain A, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain B, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain C, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain A, Crystal Structure Of A Photoactivated Rhodopsin<>Chain B, Crystal Structure Of A Photoactivated Rhodopsin<>Chain C, Crystal Structure Of A Photoactivated Rhodopsin<>Chain A, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain B, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain A, Crystal Structure Of Native Opsin: The G Protein-Coupled Receptor Rhodopsin In Its Ligand-Free State<>Chain B, Crystal Structure Of Native Opsin: The G Protein-Coupled Receptor Rhodopsin In Its Ligand-Free State<>Chain A, Structure Of Ground-State Bovine Rhodospin In A Hexagonal Crystal Form<>Chain A, Crystal Structure Of Bovine Rhodopsin<>Chain B, Crystal Structure Of Bovine Rhodopsin<>Chain A, Crystal Structure Of The Active G-Protein-Coupled Receptor Opsin In Complex With A C-Terminal Peptide Derived From The Galpha Subunit Of Transducin +sp|P08100|OPSD_HUMAN gi|195927458|pdb|3C9M|A 93.10 348 24 0 1 348 1 348 0.0 674 gi|195927458|pdb|3C9M|A 1738 324 335 0 96.26 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MCGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSCFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 348 Chain A, Structure Of A Mutant Bovine Rhodopsin In Hexagonal Crystal Form diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human.tabular --- a/test-data/blastx_rhodopsin_vs_four_human.tabular Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662 -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2 -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613 -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641 -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559 +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human.xml --- a/test-data/blastx_rhodopsin_vs_four_human.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human.xml Thu Dec 05 06:55:59 2013 -0500 @@ -2,7 +2,7 @@ blastx - BLASTX 2.2.26+ + BLASTX 2.2.28+ Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402. Query_1 @@ -17,706 +17,725 @@ L; - - - 1 - Query_1 - gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA - 1047 - - - - 0 - 0 - 29 - 102080 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 2 - Query_1 - gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA - 1047 - - - - 0 - 0 - 29 - 102080 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 3 - Query_1 - gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA - 1047 - - - - 0 - 0 - 29 - 102080 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 4 - Query_1 - gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA - 1047 - - - 1 - Subject_4 - sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - Subject_4 - 348 - - - 1 - 662.144 - 1707 - 0 - 1 - 1044 - 1 - 348 - 1 - 0 - 336 - 343 - 0 - 348 - MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA - MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA - MNGTEGPNFYVPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T SKTETSQVAPA - - - - - - - 0 - 0 - 29 - 102080 - 0.041 - 0.267 - 0.14 - - - - - 5 - Query_2 - gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds - 1574 - - - - 0 - 0 - 32 - 155472 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 6 - Query_2 - gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds - 1574 - - - - 0 - 0 - 32 - 155472 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 7 - Query_2 - gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds - 1574 - - - - 0 - 0 - 32 - 155472 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 8 - Query_2 - gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds - 1574 - - - 1 - Subject_4 - sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - Subject_4 - 348 - - - 1 - 575.089 - 1481 - 0 - 42 - 1037 - 1 - 332 - 3 - 0 - 283 - 315 - 0 - 332 - MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD - MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE - MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S++GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF++QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+ - - - - - - - 0 - 0 - 32 - 155472 - 0.041 - 0.267 - 0.14 - - - - - 9 - Query_3 - gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds - 4301 - - - - 0 - 0 - 39 - 430746 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 10 - Query_3 - gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds - 4301 - - - - 0 - 0 - 39 - 430746 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 11 - Query_3 - gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds - 4301 - - - - 0 - 0 - 39 - 430746 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 12 - Query_3 - gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds - 4301 - - - 1 - Subject_4 - sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - Subject_4 - 348 - - - 1 - 224.172 - 570 - 1.84906e-68 - 1 - 333 - 11 - 121 - 1 - 0 - 107 - 109 - 0 - 111 - VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG - VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG - VPFSN TGVVRSPFE+PQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGG - - - 2 - 129.413 - 324 - 4.62305e-36 - 3174 - 3368 - 248 - 312 - 3 - 0 - 60 - 64 - 0 - 65 - KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ - KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ - KEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQ - - - 3 - 120.939 - 302 - 2.85192e-33 - 2855 - 3022 - 177 - 232 - 2 - 0 - 54 - 56 - 0 - 56 - RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE - RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE - RYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKE - - - 4 - 118.242 - 295 - 2.16231e-32 - 1404 - 1580 - 119 - 177 - 3 - 0 - 55 - 56 - 0 - 59 - LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR - LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR - L GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+A TWVMALACAAPPL GWSR - - - 5 - 56.225 - 134 - 2.05678e-12 - 4222 - 4296 - 312 - 336 - 1 - 0 - 23 - 24 - 0 - 25 - QFRNCMLTTLCCGKNPLGDDEASTT - QFRNCMLTTICCGKNPLGDDEASAT - QFRNCMLTT+CCGKNPLGDDEAS T - - - - - - - 0 - 0 - 39 - 430746 - 0.041 - 0.267 - 0.14 - - - - - 13 - Query_4 - gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds - 983 - - - - 0 - 0 - 28 - 95680 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 14 - Query_4 - gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds - 983 - - - - 0 - 0 - 28 - 95680 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 15 - Query_4 - gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds - 983 - - - - 0 - 0 - 28 - 95680 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 16 - Query_4 - gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds - 983 - - - 1 - Subject_4 - sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - Subject_4 - 348 - - - 1 - 613.994 - 1582 - 0 - 1 - 978 - 11 - 336 - 1 - 0 - 310 - 322 - 0 - 326 - VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT - VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT - VPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVA+LFMV GGFT+TLYTS+HGYFVFG TGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+AFTWVMALACAAPPLAGWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMV+AFLICW+PYASVAFYIFTHQGSNFGP+FMTIPAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T - - - - - - - 0 - 0 - 28 - 95680 - 0.041 - 0.267 - 0.14 - - - - - 17 - Query_5 - gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds - 1047 - - - - 0 - 0 - 29 - 102080 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 18 - Query_5 - gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds - 1047 - - - - 0 - 0 - 29 - 102080 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 19 - Query_5 - gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds - 1047 - - - - 0 - 0 - 29 - 102080 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 20 - Query_5 - gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds - 1047 - - - 1 - Subject_4 - sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - Subject_4 - 348 - - - 1 - 641.728 - 1654 - 0 - 1 - 1044 - 1 - 348 - 1 - 0 - 325 - 337 - 0 - 348 - MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA - MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA - MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA - - - - - - - 0 - 0 - 29 - 102080 - 0.041 - 0.267 - 0.14 - - - - - 21 - Query_6 - gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds - 1344 - - - - 0 - 0 - 31 - 132189 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 22 - Query_6 - gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds - 1344 - - - - 0 - 0 - 31 - 132189 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 23 - Query_6 - gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds - 1344 - - - - 0 - 0 - 31 - 132189 - 0.041 - 0.267 - 0.14 - - - No hits found - - - 24 - Query_6 - gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds - 1344 - - - 1 - Subject_4 - sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 - Subject_4 - 348 - - - 1 - 559.296 - 1440 - 0 - 23 - 1018 - 1 - 332 - 2 - 0 - 272 - 307 - 0 - 332 - MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEED - MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE - MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +++ - - - - - - - 0 - 0 - 31 - 132189 - 0.041 - 0.267 - 0.14 - - - - - \ No newline at end of file + + + 1 + Query_1 + gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA + 1047 + + + + + 0 + 0 + 30 + 119944 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 2 + Query_1 + gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA + 1047 + + + + + 0 + 0 + 30 + 119944 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 3 + Query_1 + gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA + 1047 + + + + + 0 + 0 + 30 + 119944 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 4 + Query_1 + gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA + 1047 + + + 1 + Subject_4 + sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + Subject_4 + 348 + + + 1 + 639.032 + 1647 + 0 + 1 + 1044 + 1 + 348 + 1 + 0 + 336 + 343 + 0 + 348 + MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA + MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA + MNGTEGPNFYVPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T SKTETSQVAPA + + + + + + + 0 + 0 + 30 + 119944 + 0.041 + 0.267 + 0.14 + + + + + 5 + Query_2 + gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds + 1574 + + + + + 0 + 0 + 33 + 183143 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 6 + Query_2 + gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds + 1574 + + + + + 0 + 0 + 33 + 183143 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 7 + Query_2 + gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds + 1574 + + + + + 0 + 0 + 33 + 183143 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 8 + Query_2 + gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds + 1574 + + + 1 + Subject_4 + sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + Subject_4 + 348 + + + 1 + 551.206 + 1419 + 0 + 42 + 1037 + 1 + 332 + 3 + 0 + 283 + 315 + 0 + 332 + MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD + MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE + MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S++GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF++QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+ + + + + + + + 0 + 0 + 33 + 183143 + 0.041 + 0.267 + 0.14 + + + + + 9 + Query_3 + gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds + 4301 + + + + + 0 + 0 + 40 + 509838 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 10 + Query_3 + gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds + 4301 + + + + + 0 + 0 + 40 + 509838 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 11 + Query_3 + gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds + 4301 + + + + + 0 + 0 + 40 + 509838 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 12 + Query_3 + gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds + 4301 + + + 1 + Subject_4 + sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + Subject_4 + 348 + + + 1 + 220.32 + 560 + 4.29169e-67 + 1 + 333 + 11 + 121 + 1 + 0 + 107 + 109 + 0 + 111 + VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG + VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG + VPFSN TGVVRSPFE+PQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGG + + + 2 + 127.487 + 319 + 1.95079e-35 + 3174 + 3368 + 248 + 312 + 3 + 0 + 60 + 64 + 0 + 65 + KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ + KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ + KEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQ + + + 3 + 121.324 + 303 + 1.96633e-33 + 2855 + 3031 + 177 + 235 + 2 + 0 + 54 + 57 + 0 + 59 + RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS + RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA + RYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKE + + + + 4 + 97.0561 + 240 + 1.16473e-25 + 1404 + 1580 + 119 + 177 + 3 + 0 + 55 + 56 + 0 + 59 + LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR + LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR + L GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+A TWVMALACAAPPL GWSR + + + 5 + 56.9954 + 136 + 1.23645e-12 + 4222 + 4299 + 312 + 337 + 1 + 0 + 23 + 24 + 0 + 26 + QFRNCMLTTLCCGKNPLGDDEASTTA + QFRNCMLTTICCGKNPLGDDEASATV + QFRNCMLTT+CCGKNPLGDDEAS T + + + + + + + 0 + 0 + 40 + 509838 + 0.041 + 0.267 + 0.14 + + + + + 13 + Query_4 + gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds + 983 + + + + + 0 + 0 + 29 + 112346 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 14 + Query_4 + gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds + 983 + + + + + 0 + 0 + 29 + 112346 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 15 + Query_4 + gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds + 983 + + + + + 0 + 0 + 29 + 112346 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 16 + Query_4 + gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds + 983 + + + 1 + Subject_4 + sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + Subject_4 + 348 + + + 1 + 589.341 + 1518 + 0 + 1 + 978 + 11 + 336 + 1 + 0 + 310 + 322 + 0 + 326 + VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT + VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT + VPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVA+LFMV GGFT+TLYTS+HGYFVFG TGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+AFTWVMALACAAPPLAGWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMV+AFLICW+PYASVAFYIFTHQGSNFGP+FMTIPAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T + + + + + + + 0 + 0 + 29 + 112346 + 0.041 + 0.267 + 0.14 + + + + + 17 + Query_5 + gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds + 1047 + + + + + 0 + 0 + 30 + 119944 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 18 + Query_5 + gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds + 1047 + + + + + 0 + 0 + 30 + 119944 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 19 + Query_5 + gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds + 1047 + + + + + 0 + 0 + 30 + 119944 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 20 + Query_5 + gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds + 1047 + + + 1 + Subject_4 + sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + Subject_4 + 348 + + + 1 + 619.387 + 1596 + 0 + 1 + 1044 + 1 + 348 + 1 + 0 + 325 + 337 + 0 + 348 + MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA + MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA + MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA + + + + + + + 0 + 0 + 30 + 119944 + 0.041 + 0.267 + 0.14 + + + + + 21 + Query_6 + gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds + 1344 + + + + + 0 + 0 + 32 + 155584 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 22 + Query_6 + gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds + 1344 + + + + + 0 + 0 + 32 + 155584 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 23 + Query_6 + gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds + 1344 + + + + + 0 + 0 + 32 + 155584 + 0.041 + 0.267 + 0.14 + + + No hits found + + + 24 + Query_6 + gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds + 1344 + + + 1 + Subject_4 + sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 + Subject_4 + 348 + + + 1 + 532.717 + 1371 + 0 + 23 + 1021 + 1 + 333 + 2 + 0 + 272 + 307 + 0 + 333 + MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG + MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA + MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV TW MALACA PPL GWSRYIPEGLQCSCGIDYYT P +NNESFVIYMF HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP +++ + + + + + + + 0 + 0 + 32 + 155584 + 0.041 + 0.267 + 0.14 + + + + + + diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human_converted.tabular --- a/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662 -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2 -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613 -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641 -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559 +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular --- a/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662 sp|P08100|OPSD_HUMAN 1707 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575 sp|P08100|OPSD_HUMAN 1481 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224 sp|P08100|OPSD_HUMAN 570 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129 sp|P08100|OPSD_HUMAN 324 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120 sp|P08100|OPSD_HUMAN 302 54 56 0 100.00 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118 sp|P08100|OPSD_HUMAN 295 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2 sp|P08100|OPSD_HUMAN 134 23 24 0 96.00 1 0 QFRNCMLTTLCCGKNPLGDDEASTT QFRNCMLTTICCGKNPLGDDEASAT 4301 348 -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613 sp|P08100|OPSD_HUMAN 1582 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641 sp|P08100|OPSD_HUMAN 1654 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559 sp|P08100|OPSD_HUMAN 1440 272 307 0 92.47 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEED MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1344 348 +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 sp|P08100|OPSD_HUMAN 560 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 303 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human_ext.tabular --- a/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Thu Dec 05 06:55:59 2013 -0500 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662 sp|P08100|OPSD_HUMAN 1707 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575 sp|P08100|OPSD_HUMAN 1481 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224 sp|P08100|OPSD_HUMAN 570 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129 sp|P08100|OPSD_HUMAN 324 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120 sp|P08100|OPSD_HUMAN 302 54 56 0 100.00 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118 sp|P08100|OPSD_HUMAN 295 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2 sp|P08100|OPSD_HUMAN 134 23 24 0 96.00 1 0 QFRNCMLTTLCCGKNPLGDDEASTT QFRNCMLTTICCGKNPLGDDEASAT 4301 348 -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613 sp|P08100|OPSD_HUMAN 1582 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641 sp|P08100|OPSD_HUMAN 1654 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559 sp|P08100|OPSD_HUMAN 1440 272 307 0 92.47 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEED MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1344 348 +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 sp|P08100|OPSD_HUMAN 560 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 303 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/dustmasker_three_human.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dustmasker_three_human.fasta Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,183 @@ +>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds +GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG +CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC +GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC +CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA +TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC +TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT +TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA +CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA +TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA +CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC +TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA +TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT +TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG +GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA +TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA +ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG +TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA +CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC +AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT +CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA +ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC +CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA +TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG +CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC +TATGTGTatttttattttgaataaacagaaagaaattttgggtttttaatttttttCTCC +CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTaaaaaaaaaaaaaCCTGCTA +GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT +ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA +GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT +CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT +GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA +TTGCTTATGAAAATTCCATAGTGGTAtttttttGGATTCTTAATGTGTAACTTAAACATA +CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA +AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG +GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG +AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT +TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT +TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACtt +tttttCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG +CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA +CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT +TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC +TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT +GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT +AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT +TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT +GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT +TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA +GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA +TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA +TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT +AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT +TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT +ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT +GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA +GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT +GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT +TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT +TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT +AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG +AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC +TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT +CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTAAA +GACAAAATAAAGGTTAAAGAGGGCATAAAGGAGCACTCAATTGCAGAGATTTGAGGACAT +TATTTTTATTTTGAGCTTTAAAAAGATGAATAGGTGTTCTCAGGAGGTAGGGATCTGGCT +GAGAGGGAATAATCTGAGCAAAGGTATGAAACAGCCTAATGCATTAGAGAAAAAAGTTCT +TTTAGTAAGGCATTTGGGGTTGGGGAAGCTAGAAAAAGAAATGGGAGCTGGTCACACAGG +GCCTTGTGTGCCAGACTAAGGGGTTTGTAGTATATATTGTAGGCAGAAGAGATCCATCAA +CAGATTGCAAGCAAGGAAGTATGTTCACTTTAAAGTTTGAGAAAGAATAGTGTGGAAGCA +CGTCTCAAATTTAGACTTACTTGTTCCCCCTCTGAACCGTGAATCAGACCATTTCAGGTA +GAAGTCTTCCCCGGTTTATCTGATCTACTCGGGGCCTCAGGCTTCTCAGCTGGGAAGAGA +GGATGCAAGACCAGACTGAAGAACACGGTTGAGTCCCCAGAACCAAAAGGGGGCCTTTCT +GCTTCTTAGCCAGCTACCTCTTCGAGTTTTTCAAATTGTGAGGGGGACCATAAAAGGATG +GAAACTTTTAGATGACATTCTACAAATTAtttttttCTTTAAATTAAAAGAACCTAGCCA +ATAAGATAGAGAATGGGCATCTAAGGCATCTCAGAGCTCTCTGATGAAGCCAGGTTGTCA +AAGATCATTTGCAAAAGAAGGGAAAACTGGCATGACAAAAGCTACAGAGAGGAGAGTGAA +ATATAGAAGTGTTTGAAATGTTCAAGCTCACAATAAGCTTAAATTTATAGAAAATGCTAA +GGTTGTCAAGAAGGCttttttttttttcttttttAAACCTGAGGGCAAAAAGGAATGGAT +AAAGTAGTGTAATGGATTGACAATCAGGAAGAACAGAATAACTCAGtttttttttCTCCT +ACAAGGAGATATGGCTGGACCAAAATAAAATGACATGAAATTGCAAAAATGAAAAT +>ENA|M10051|M10051.1 Human insulin receptor mRNA, complete cds +GGGGGGCTGCGCGGCCGGGTCGGTGCGCACACGAGAAGGACGCGCGGCCCCCAGCGCTCT +TGGGGGCCGCCTCGGAGCATGACCCCCGCGGGCCAGCGCCGCGCGCCTGATCCGAGGAGA +CCCCGCGCTCCCGCAGCCAtgggcaccgggggccggcggggggcggcggccgcgccgctg +ctggtggcggtggccgcgctgctactgggcgccgcgggccACCTGTACCCCGGAGAGGTG +TGTCCCGGCATGGATATCCGGAACAACCTCACTAGGTTGCATGAGCTGGAGAATTGCTCT +GTCATCGAAGGACACTTGCAGATACTCTTGATGTTCAAAACGAGGCCCGAAGATTTCCGA +GACCTCAGTTTCCCCAAACTCATCATGATCACTGATTACTTGCTGCTCTTCCGGGTCTAT +GGGCTCGAGAGCCTGAAGGACCTGTTCCCCAACCTCACGGTCATCCGGGGATCACGACTG +TTCTTTAACTACGCGCTGGTCATCTTCGAGATGGTTCACCTCAAGGAACTCGGCCTCTAC +AACCTGATGAACATCACCCGGGGTTCTGTCCGCATCGAGAAGAACAATGAGCTCTGTTAC +TTGGCCACTATCGACTGGTCCCGTATCCTGGATTCCGTGGAGGATAATCACATCGTGTTG +AACAAAGATGACAACGAGGAGTGTGGAGACATCTGTCCGGGTACCGCGAAGGGCAAGACC +AACTGCCCCGCCACCGTCATCAACGGGCAGTTTGTCGAACGATGTTGGACTCATAGTCAC +TGCCAGAAAGTTTGCCCGACCATCTGTAAGTCACACGGCTGCACCGCCGAAGGCCTCTGT +TGCCACAGCGAGTGCCTGGGCAACTGTTCTCAGCCCGACGACCCCACCAAGTGCGTGGCC +TGCCGCAACTTCTACCTGGACGGCAGGTGTGTGGAGACCTGCCCGCCCCCGTACTACCAC +TTCCAGGACTGGCGCTGTGTGAACTTCAGCTTCTGCCAGGACCTGCACCACAAATGCAAG +AACTCGCGGAGGCAGGGCTGCCACCAATACGTCATTCACAACAACAAGTGCATCCCTGAG +TGTCCCTCCGGGTACACGATGAATTCCAGCAACTTGCTGTGCACCCCATGCCTGGGTCCC +TGTCCCAAGGTGTGCCACCTCCTAGAAGGCGAGAAGACCATCGACTCGGTGACGTCTGCC +CAGGAGCTCCGAGGATGCACCGTCATCAACGGGAGTCTGATCATCAACATTCGAGGAGGC +AACAATCTGGCAGCTGAGCTAGAAGCCAACCTCGGCCTCATTGAAGAAATTTCAGGGTAT +CTAAAAATCCGCCGATCCTACGCTCTGGTGTCACTTTCCTTCTTCCGGAAGTTACGTCTG +ATTCGAGGAGAGACCTTGGAAATTGGGAACTACTCCTTCTATGCCTTGGACAACCAGAAC +CTAAGGCAGCTCTGGGACTGGAGCAAACACAACCTCACCACCACTCAGGGGAAACTCTTC +TTCCACTATAACCCCAAACTCTGCTTGTCAGAAATCCACAAGATGGAAGAAGTTTCAGGA +ACCAAGGGGCGCCAGGAGAGAAACGACATTGCCCTGAAGACCAATGGGGACAAGGCATCC +TGTGAAAATGAGTTACTTAAATTTTCTTACATTCGGACATCTTTTGACAAGATCTTGCTG +AGATGGGAGCCGTACTGGCCCCCCGACTTCCGAGACCTCTTGGGGTTCATGCTGTTCTAC +AAAGAGGCCCCTTATCAGAATGTGACGGAGTTCGATGGGCAGGATGCGTGTGGTTCCAAC +AGTTGGACGGTGGTAGACATTGACCCACCCCTGAGGTCCAACGACCCCAAATCACAGAAC +CACCCAGGGTGGCTGATGCGGGGTCTCAAGCCCTGGACCCAGTATGCCATCTTTGTGAAG +ACCCTGGTCACCTTTTCGGATGAACGCCGGACCTATGGGGCCAAGAGTGACATCATTTAT +GTCCAGACAGATGCCACCAACCCCTCTGTGCCCCTGGATCCAATCTCAGTGTCTAACTCA +TCATCCCAGATTATTCTGAAGTGGAAACCACCCTCCGACCCCAATGGCAACATCACCCAC +TACCTGGTTTTCTGGGAGAGGCAGGCGGAAGACAGTGAGCTGTTCGAGCTGGATTATTGC +CTCAAAGGGCTGAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT +CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG +ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT +TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC +CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG +CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG +CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC +TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC +AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT +GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG +AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG +GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT +GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC +TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT +GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT +ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT +TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC +GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC +GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG +GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG +GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC +AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC +CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA +GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG +TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA +ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC +AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC +ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA +CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT +CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA +TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG +CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT +GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC +TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC +TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG +ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT +CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT +ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC +TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA +GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT +TTTTTCGTTccccccacccgcccccAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT +CttttttttttttttttttttttttttttGCTGGTGTCTGAGCTTCAGTATAAAAGACAA +AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA +>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds +CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC +CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG +CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG +GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC +TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT +CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC +ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT +TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG +GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC +CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA +CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC +TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC +TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG +GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC +CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG +GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA +GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG +CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT +GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG +CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA +GCCATCCCACCAG diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/dustmasker_three_human.maskinfo-asn1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dustmasker_three_human.maskinfo-asn1 Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,64 @@ +Blast-db-mask-info ::= { + algo-id 2, + algo-program dust, + algo-options "window=64; level=20; linker=1", + masks { + masks { + packed-int { + { + from 1447, + to 1495, + id local id 1 + }, + { + from 1540, + to 1552, + id local id 1 + }, + { + from 1886, + to 1892, + id local id 1 + }, + { + from 2278, + to 2284, + id local id 1 + }, + { + from 4409, + to 4415, + id local id 1 + }, + { + from 4635, + to 4653, + id local id 1 + }, + { + from 4726, + to 4734, + id local id 1 + } + }, + packed-int { + { + from 139, + to 219, + id local id 2 + }, + { + from 4569, + to 4584, + id local id 2 + }, + { + from 4621, + to 4648, + id local id 2 + } + } + }, + more FALSE + } +} diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/dustmasker_three_human.maskinfo-asn1-binary Binary file test-data/dustmasker_three_human.maskinfo-asn1-binary has changed diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta.log Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,10 @@ + + +Building a new DB, current time: 11/21/2013 11:16:27 +New DB name: /tmp/tmpnSjpCP/tmpwAbNo4/database/files/000/dataset_2_files/blastdb +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B +Adding sequences from FASTA; added 4 sequences in 0.00202417 seconds. diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.phd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta.phd Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,4 @@ +11117184492 +29249033410 +36665887501 +5392473183 diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.phi Binary file test-data/four_human_proteins.fasta.phi has changed diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.phr Binary file test-data/four_human_proteins.fasta.phr has changed diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.pin Binary file test-data/four_human_proteins.fasta.pin has changed diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.pog Binary file test-data/four_human_proteins.fasta.pog has changed diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.psd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta.psd Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,4 @@ +gnl|bl_ord_id|00 +gnl|bl_ord_id|11 +gnl|bl_ord_id|22 +gnl|bl_ord_id|33 diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.psi Binary file test-data/four_human_proteins.fasta.psi has changed diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.psq Binary file test-data/four_human_proteins.fasta.psq has changed diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/tblastn_four_human_vs_rhodopsin.html --- a/test-data/tblastn_four_human_vs_rhodopsin.html Mon Sep 23 06:14:13 2013 -0400 +++ b/test-data/tblastn_four_human_vs_rhodopsin.html Thu Dec 05 06:55:59 2013 -0500 @@ -3,7 +3,7 @@
 
-TBLASTN 2.2.26+
+TBLASTN 2.2.28+
 
 
 Query= sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44
@@ -20,12 +20,12 @@
 
 
 
-Lambda     K      H
-   0.347    0.182    0.684 
+Lambda      K        H        a         alpha
+   0.347    0.182    0.684    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 127710
 
@@ -45,12 +45,12 @@
 
 
 
-Lambda     K      H
-   0.347    0.182    0.684 
+Lambda      K        H        a         alpha
+   0.347    0.182    0.684    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 127710
 
@@ -70,12 +70,12 @@
 
 
 
-Lambda     K      H
-   0.347    0.182    0.684 
+Lambda      K        H        a         alpha
+   0.347    0.182    0.684    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 127710
 
@@ -95,12 +95,12 @@
 
 
 
-Lambda     K      H
-   0.347    0.182    0.684 
+Lambda      K        H        a         alpha
+   0.347    0.182    0.684    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 127710
 
@@ -120,12 +120,12 @@
 
 
 
-Lambda     K      H
-   0.347    0.182    0.684 
+Lambda      K        H        a         alpha
+   0.347    0.182    0.684    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 127710
 
@@ -145,12 +145,12 @@
 
 
 
-Lambda     K      H
-   0.347    0.182    0.684 
+Lambda      K        H        a         alpha
+   0.347    0.182    0.684    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 127710
 
@@ -169,12 +169,12 @@
 
 
 
-Lambda     K      H
-   0.334    0.170    0.615 
+Lambda      K        H        a         alpha
+   0.334    0.170    0.615    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 370988
 
@@ -194,12 +194,12 @@
 
 
 
-Lambda     K      H
-   0.334    0.170    0.615 
+Lambda      K        H        a         alpha
+   0.334    0.170    0.615    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 370988
 
@@ -219,12 +219,12 @@
 
 
 
-Lambda     K      H
-   0.334    0.170    0.615 
+Lambda      K        H        a         alpha
+   0.334    0.170    0.615    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 370988
 
@@ -244,12 +244,12 @@
 
 
 
-Lambda     K      H
-   0.334    0.170    0.615 
+Lambda      K        H        a         alpha
+   0.334    0.170    0.615    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 370988
 
@@ -269,12 +269,12 @@
 
 
 
-Lambda     K      H
-   0.334    0.170    0.615 
+Lambda      K        H        a         alpha
+   0.334    0.170    0.615    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 370988
 
@@ -294,12 +294,12 @@
 
 
 
-Lambda     K      H
-   0.334    0.170    0.615 
+Lambda      K        H        a         alpha
+   0.334    0.170    0.615    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 370988
 
@@ -318,12 +318,12 @@
 
 
 
-Lambda     K      H
-   0.346    0.180    0.700 
+Lambda      K        H        a         alpha
+   0.346    0.180    0.700    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 441350
 
@@ -343,12 +343,12 @@
 
 
 
-Lambda     K      H
-   0.346    0.180    0.700 
+Lambda      K        H        a         alpha
+   0.346    0.180    0.700    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 441350
 
@@ -368,12 +368,12 @@
 
 
 
-Lambda     K      H
-   0.346    0.180    0.700 
+Lambda      K        H        a         alpha
+   0.346    0.180    0.700    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 441350
 
@@ -393,12 +393,12 @@
 
 
 
-Lambda     K      H
-   0.346    0.180    0.700 
+Lambda      K        H        a         alpha
+   0.346    0.180    0.700    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 441350
 
@@ -418,12 +418,12 @@
 
 
 
-Lambda     K      H
-   0.346    0.180    0.700 
+Lambda      K        H        a         alpha
+   0.346    0.180    0.700    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 441350
 
@@ -443,12 +443,12 @@
 
 
 
-Lambda     K      H
-   0.346    0.180    0.700 
+Lambda      K        H        a         alpha
+   0.346    0.180    0.700    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 441350
 
@@ -492,12 +492,12 @@
 
 
 
-Lambda     K      H
-   0.351    0.182    0.707 
+Lambda      K        H        a         alpha
+   0.351    0.182    0.707    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 109230
 
@@ -542,12 +542,12 @@
 
 
 
-Lambda     K      H
-   0.351    0.182    0.707 
+Lambda      K        H        a         alpha
+   0.351    0.182    0.707    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 109230
 
@@ -616,12 +616,12 @@
 
 
 
-Lambda     K      H
-   0.351    0.182    0.707 
+Lambda      K        H        a         alpha
+   0.351    0.182    0.707    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 109230
 
@@ -666,12 +666,12 @@
 
 
 
-Lambda     K      H
-   0.351    0.182    0.707 
+Lambda      K        H        a         alpha
+   0.351    0.182    0.707    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 109230
 
@@ -716,12 +716,12 @@
 
 
 
-Lambda     K      H
-   0.351    0.182    0.707 
+Lambda      K        H        a         alpha
+   0.351    0.182    0.707    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 109230
 
@@ -766,12 +766,12 @@
 
 
 
-Lambda     K      H
-   0.351    0.182    0.707 
+Lambda      K        H        a         alpha
+   0.351    0.182    0.707    0.522     1.92 
 
 Gapped
-Lambda     K      H
-   0.299   0.0710    0.270 
+Lambda      K        H        a         alpha    sigma
+   0.299   0.0710    0.270     1.10     13.8     14.5 
 
 Effective search space used: 109230
 
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/tblastn_four_human_vs_rhodopsin.xml
--- a/test-data/tblastn_four_human_vs_rhodopsin.xml	Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin.xml	Thu Dec 05 06:55:59 2013 -0500
@@ -2,7 +2,7 @@
 
 
   tblastn
-  TBLASTN 2.2.26+
+  TBLASTN 2.2.28+
   Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.
   
   Query_1
@@ -17,706 +17,725 @@
       F
     
   
-  
-    
-      1
-      Query_1
-      sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
-      406
-      
-      
-        
-          0
-          0
-          19
-          127710
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      2
-      Query_1
-      sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
-      406
-      
-      
-        
-          0
-          0
-          19
-          127710
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      3
-      Query_1
-      sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
-      406
-      
-      
-        
-          0
-          0
-          19
-          127710
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      4
-      Query_1
-      sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
-      406
-      
-      
-        
-          0
-          0
-          19
-          127710
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      5
-      Query_1
-      sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
-      406
-      
-      
-        
-          0
-          0
-          19
-          127710
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      6
-      Query_1
-      sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
-      406
-      
-      
-        
-          0
-          0
-          19
-          127710
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      7
-      Query_2
-      sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
-      1161
-      
-      
-        
-          0
-          0
-          23
-          370988
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      8
-      Query_2
-      sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
-      1161
-      
-      
-        
-          0
-          0
-          23
-          370988
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      9
-      Query_2
-      sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
-      1161
-      
-      
-        
-          0
-          0
-          23
-          370988
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      10
-      Query_2
-      sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
-      1161
-      
-      
-        
-          0
-          0
-          23
-          370988
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      11
-      Query_2
-      sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
-      1161
-      
-      
-        
-          0
-          0
-          23
-          370988
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      12
-      Query_2
-      sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
-      1161
-      
-      
-        
-          0
-          0
-          23
-          370988
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      13
-      Query_3
-      sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
-      1382
-      
-      
-        
-          0
-          0
-          24
-          441350
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      14
-      Query_3
-      sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
-      1382
-      
-      
-        
-          0
-          0
-          24
-          441350
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      15
-      Query_3
-      sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
-      1382
-      
-      
-        
-          0
-          0
-          24
-          441350
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      16
-      Query_3
-      sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
-      1382
-      
-      
-        
-          0
-          0
-          24
-          441350
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      17
-      Query_3
-      sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
-      1382
-      
-      
-        
-          0
-          0
-          24
-          441350
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      18
-      Query_3
-      sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
-      1382
-      
-      
-        
-          0
-          0
-          24
-          441350
-          0.071
-          0.299
-          0.27
-        
-      
-      No hits found
-    
-    
-      19
-      Query_4
-      sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-      348
-      
-        
-          1
-          Subject_1
-          gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA
-          Subject_1
-          1047
-          
-            
-              1
-              732.393
-              1689
-              0
-              1
-              348
-              1
-              1044
-              0
-              1
-              336
-              343
-              0
-              348
-              MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
-              MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA
-              MNGTEGPNFYVPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T SKTETSQVAPA
-            
-          
-        
-      
-      
-        
-          0
-          0
-          18
-          109230
-          0.071
-          0.299
-          0.27
-        
-      
-    
-    
-      20
-      Query_4
-      sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-      348
-      
-        
-          1
-          Subject_2
-          gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds
-          Subject_2
-          1574
-          
-            
-              1
-              646.12
-              1489
-              0
-              1
-              341
-              42
-              1067
-              0
-              3
-              290
-              320
-              1
-              342
-              MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE
-              MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE
-              MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S+ GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF+ QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+A SA  SKTE
-            
-          
-        
-      
-      
-        
-          0
-          0
-          18
-          109230
-          0.071
-          0.299
-          0.27
-        
-      
-    
-    
-      21
-      Query_4
-      sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-      348
-      
-        
-          1
-          Subject_3
-          gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds
-          Subject_3
-          4301
-          
-            
-              1
-              151.343
-              342
-              1.39567e-72
-              239
-              312
-              3147
-              3368
-              0
-              3
-              69
-              73
-              0
-              74
-              ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ
-              ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ
-              ESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQ
-            
-            
-              2
-              126.324
-              284
-              1.39567e-72
-              177
-              235
-              2855
-              3031
-              0
-              2
-              54
-              57
-              0
-              59
-              RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA
-              RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS
-              RYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKE  +
-            
-            
-              3
-              229.42
-              523
-              9.34154e-67
-              11
-              121
-              1
-              333
-              0
-              1
-              107
-              109
-              0
-              111
-              VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG
-              VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG
-              VPFSN TGVVRSPFE+PQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGG
-            
-            
-              4
-              122.873
-              276
-              1.03783e-32
-              119
-              177
-              1404
-              1580
-              0
-              3
-              55
-              56
-              0
-              59
-              LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR
-              LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR
-              L GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+A TWVMALACAAPPL GWSR
-            
-            
-              5
-              57.7368
-              125
-              1.50808e-12
-              312
-              337
-              4222
-              4299
-              0
-              1
-              23
-              24
-              0
-              26
-              QFRNCMLTTICCGKNPLGDDEASATV
-              QFRNCMLTTLCCGKNPLGDDEASTTA
-              QFRNCMLTT+CCGKNPLGDDEAS T 
-            
-          
-        
-      
-      
-        
-          0
-          0
-          18
-          109230
-          0.071
-          0.299
-          0.27
-        
-      
-    
-    
-      22
-      Query_4
-      sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-      348
-      
-        
-          1
-          Subject_4
-          gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds
-          Subject_4
-          983
-          
-            
-              1
-              658.198
-              1517
-              0
-              11
-              336
-              1
-              978
-              0
-              1
-              310
-              322
-              0
-              326
-              VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT
-              VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT
-              VPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVA+LFMV GGFT+TLYTS+HGYFVFG TGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+AFTWVMALACAAPPLAGWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMV+AFLICW+PYASVAFYIFTHQGSNFGP+FMTIPAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T
-            
-          
-        
-      
-      
-        
-          0
-          0
-          18
-          109230
-          0.071
-          0.299
-          0.27
-        
-      
-    
-    
-      23
-      Query_4
-      sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-      348
-      
-        
-          1
-          Subject_5
-          gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds
-          Subject_5
-          1047
-          
-            
-              1
-              711.256
-              1640
-              0
-              1
-              348
-              1
-              1044
-              0
-              1
-              325
-              337
-              0
-              348
-              MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
-              MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA
-              MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA
-            
-          
-        
-      
-      
-        
-          0
-          0
-          18
-          109230
-          0.071
-          0.299
-          0.27
-        
-      
-    
-    
-      24
-      Query_4
-      sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-      348
-      
-        
-          1
-          Subject_6
-          gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds
-          Subject_6
-          1344
-          
-            
-              1
-              626.708
-              1444
-              0
-              1
-              341
-              23
-              1048
-              0
-              2
-              281
-              311
-              1
-              342
-              MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE
-              MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE
-              MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE HAIMGV  TW MALACA PPL GWSRYIPEGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR CM+TT+CCGKNP   +D ASAT SKTE
-            
-          
-        
-      
-      
-        
-          0
-          0
-          18
-          109230
-          0.071
-          0.299
-          0.27
-        
-      
-    
-  
-
\ No newline at end of file
+
+
+  1
+  Query_1
+  sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+  406
+
+
+  
+    
+      0
+      0
+      19
+      127710
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  2
+  Query_1
+  sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+  406
+
+
+  
+    
+      0
+      0
+      19
+      127710
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  3
+  Query_1
+  sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+  406
+
+
+  
+    
+      0
+      0
+      19
+      127710
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  4
+  Query_1
+  sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+  406
+
+
+  
+    
+      0
+      0
+      19
+      127710
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  5
+  Query_1
+  sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+  406
+
+
+  
+    
+      0
+      0
+      19
+      127710
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  6
+  Query_1
+  sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+  406
+
+
+  
+    
+      0
+      0
+      19
+      127710
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  7
+  Query_2
+  sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+  1161
+
+
+  
+    
+      0
+      0
+      23
+      370988
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  8
+  Query_2
+  sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+  1161
+
+
+  
+    
+      0
+      0
+      23
+      370988
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  9
+  Query_2
+  sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+  1161
+
+
+  
+    
+      0
+      0
+      23
+      370988
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  10
+  Query_2
+  sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+  1161
+
+
+  
+    
+      0
+      0
+      23
+      370988
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  11
+  Query_2
+  sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+  1161
+
+
+  
+    
+      0
+      0
+      23
+      370988
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  12
+  Query_2
+  sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+  1161
+
+
+  
+    
+      0
+      0
+      23
+      370988
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  13
+  Query_3
+  sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+  1382
+
+
+  
+    
+      0
+      0
+      24
+      441350
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  14
+  Query_3
+  sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+  1382
+
+
+  
+    
+      0
+      0
+      24
+      441350
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  15
+  Query_3
+  sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+  1382
+
+
+  
+    
+      0
+      0
+      24
+      441350
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  16
+  Query_3
+  sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+  1382
+
+
+  
+    
+      0
+      0
+      24
+      441350
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  17
+  Query_3
+  sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+  1382
+
+
+  
+    
+      0
+      0
+      24
+      441350
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  18
+  Query_3
+  sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+  1382
+
+
+  
+    
+      0
+      0
+      24
+      441350
+      0.071
+      0.299
+      0.27
+    
+  
+  No hits found
+
+
+  19
+  Query_4
+  sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+  348
+
+
+  1
+  Subject_1
+  gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA
+  Subject_1
+  1047
+  
+    
+      1
+      732.393
+      1689
+      0
+      1
+      348
+      1
+      1044
+      0
+      1
+      336
+      343
+      0
+      348
+      MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
+      MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA
+      MNGTEGPNFYVPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T SKTETSQVAPA
+    
+  
+
+
+  
+    
+      0
+      0
+      18
+      109230
+      0.071
+      0.299
+      0.27
+    
+  
+
+
+  20
+  Query_4
+  sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+  348
+
+
+  1
+  Subject_2
+  gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds
+  Subject_2
+  1574
+  
+    
+      1
+      646.12
+      1489
+      0
+      1
+      341
+      42
+      1067
+      0
+      3
+      290
+      320
+      1
+      342
+      MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE
+      MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE
+      MNGTEGPNFY+P SN TGVVRSPFEYPQYYLAEPWQ+S+L AYMFLLI+LGFPINF+TLYVT+QHKKLRTPLNYILLNLA A+ FMVL GFT T+Y+S+ GYF+ G TGC +EGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRF ENHA+MGVAFTW+MAL+CA PPL GWSRYIPEG+QCSCG+DYYTLKPEVNNESFVIYMFVVHFTIP+IIIFFCYG+LV TVKEAAAQQQESATTQKAEKEVTRMVIIMV+ FLICWVPYASVAF+IF+ QGS FGPIFMT+PAFFAKS++IYNPVIYIM+NKQFRNCM+TT+CCGKNP G+D+A SA  SKTE
+    
+  
+
+
+  
+    
+      0
+      0
+      18
+      109230
+      0.071
+      0.299
+      0.27
+    
+  
+
+
+  21
+  Query_4
+  sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+  348
+
+
+  1
+  Subject_3
+  gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds
+  Subject_3
+  4301
+  
+    
+      1
+      151.343
+      342
+      1.39567e-72
+      239
+      312
+      3147
+      3368
+      0
+      3
+      69
+      73
+      0
+      74
+      ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ
+      ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ
+      ESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGSNFGPIFMT+PAFFAKS++IYNPVIYIMMNKQ
+    
+    
+      2
+      126.324
+      284
+      1.39567e-72
+      177
+      235
+      2855
+      3031
+      0
+      2
+      54
+      57
+      0
+      59
+      RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA
+      RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS
+      RYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKE  +
+    
+    
+      3
+      229.42
+      523
+      9.34154e-67
+      11
+      121
+      1
+      333
+      0
+      1
+      107
+      109
+      0
+      111
+      VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG
+      VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG
+      VPFSN TGVVRSPFE+PQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGG
+    
+    
+      4
+      122.873
+      276
+      1.03783e-32
+      119
+      177
+      1404
+      1580
+      0
+      3
+      55
+      56
+      0
+      59
+      LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR
+      LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR
+      L GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+A TWVMALACAAPPL GWSR
+    
+    
+      5
+      57.7368
+      125
+      1.50808e-12
+      312
+      337
+      4222
+      4299
+      0
+      1
+      23
+      24
+      0
+      26
+      QFRNCMLTTICCGKNPLGDDEASATV
+      QFRNCMLTTLCCGKNPLGDDEASTTA
+      QFRNCMLTT+CCGKNPLGDDEAS T 
+    
+  
+
+
+  
+    
+      0
+      0
+      18
+      109230
+      0.071
+      0.299
+      0.27
+    
+  
+
+
+  22
+  Query_4
+  sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+  348
+
+
+  1
+  Subject_4
+  gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds
+  Subject_4
+  983
+  
+    
+      1
+      658.198
+      1517
+      0
+      11
+      336
+      1
+      978
+      0
+      1
+      310
+      322
+      0
+      326
+      VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT
+      VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT
+      VPFSN TGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVA+LFMV GGFT+TLYTS+HGYFVFG TGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMG+AFTWVMALACAAPPLAGWSRYIPEG+QCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMI+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMV+AFLICW+PYASVAFYIFTHQGSNFGP+FMTIPAFFAKS++IYNPVIYIMMNKQFRNCMLTT+CCGKNPLGDDEAS T
+    
+  
+
+
+  
+    
+      0
+      0
+      18
+      109230
+      0.071
+      0.299
+      0.27
+    
+  
+
+
+  23
+  Query_4
+  sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+  348
+
+
+  1
+  Subject_5
+  gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds
+  Subject_5
+  1047
+  
+    
+      1
+      711.256
+      1640
+      0
+      1
+      348
+      1
+      1044
+      0
+      1
+      325
+      337
+      0
+      348
+      MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
+      MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA
+      MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA
+    
+  
+
+
+  
+    
+      0
+      0
+      18
+      109230
+      0.071
+      0.299
+      0.27
+    
+  
+
+
+  24
+  Query_4
+  sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+  348
+
+
+  1
+  Subject_6
+  gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds
+  Subject_6
+  1344
+  
+    
+      1
+      626.708
+      1444
+      0
+      1
+      341
+      23
+      1048
+      0
+      2
+      281
+      311
+      1
+      342
+      MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE
+      MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE
+      MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE HAIMGV  TW MALACA PPL GWSRYIPEGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR CM+TT+CCGKNP   +D ASAT SKTE
+    
+  
+
+
+  
+    
+      0
+      0
+      18
+      109230
+      0.071
+      0.299
+      0.27
+    
+  
+
+
+
+
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/tblastn_four_human_vs_rhodopsin_ext.tabular
--- a/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular	Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular	Thu Dec 05 06:55:59 2013 -0500
@@ -1,10 +1,10 @@
-sp|P08100|OPSD_HUMAN	gi|57163782|ref|NM_001009242.1|	96.55	348	12	0	1	348	1	1044	0.0	 732	gi|57163782|ref|NM_001009242.1|	1689	336	343	0	98.56	0	1	MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA	MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA	348	1047
-sp|P08100|OPSD_HUMAN	gi|2734705|gb|U59921.1|BBU59921	84.80	342	51	1	1	341	42	1067	0.0	 646	gi|2734705|gb|U59921.1|BBU59921	1489	290	320	1	93.57	0	3	MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE	MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE	348	1574
-sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	93.24	74	5	0	239	312	3147	3368	1e-72	 151	gi|283855845|gb|GQ290303.1|	342	69	73	0	98.65	0	3	ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ	ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ	348	4301
-sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	91.53	59	5	0	177	235	2855	3031	1e-72	 126	gi|283855845|gb|GQ290303.1|	284	54	57	0	96.61	0	2	RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA	RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS	348	4301
-sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	96.40	111	4	0	11	121	1	333	9e-67	 229	gi|283855845|gb|GQ290303.1|	523	107	109	0	98.20	0	1	VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG	VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG	348	4301
-sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	93.22	59	4	0	119	177	1404	1580	1e-32	 122	gi|283855845|gb|GQ290303.1|	276	55	56	0	94.92	0	3	LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR	LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR	348	4301
-sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	88.46	26	3	0	312	337	4222	4299	2e-12	57.7	gi|283855845|gb|GQ290303.1|	125	23	24	0	92.31	0	1	QFRNCMLTTICCGKNPLGDDEASATV	QFRNCMLTTLCCGKNPLGDDEASTTA	348	4301
-sp|P08100|OPSD_HUMAN	gi|283855822|gb|GQ290312.1|	95.09	326	16	0	11	336	1	978	0.0	 658	gi|283855822|gb|GQ290312.1|	1517	310	322	0	98.77	0	1	VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT	VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT	348	983
-sp|P08100|OPSD_HUMAN	gi|18148870|dbj|AB062417.1|	93.39	348	23	0	1	348	1	1044	0.0	 711	gi|18148870|dbj|AB062417.1|	1640	325	337	0	96.84	0	1	MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA	MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA	348	1047
-sp|P08100|OPSD_HUMAN	gi|12583664|dbj|AB043817.1|	82.16	342	60	1	1	341	23	1048	0.0	 626	gi|12583664|dbj|AB043817.1|	1444	281	311	1	90.94	0	2	MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE	MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE	348	1344
+sp|P08100|OPSD_HUMAN	gi|57163782|ref|NM_001009242.1|	96.55	348	12	0	1	348	1	1044	0.0	 732	gi|57163782|ref|NM_001009242.1|	1689	336	343	0	98.56	0	1	MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA	MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA	348	1047	N/A
+sp|P08100|OPSD_HUMAN	gi|2734705|gb|U59921.1|BBU59921	84.80	342	51	1	1	341	42	1067	0.0	 646	gi|2734705|gb|U59921.1|BBU59921	1489	290	320	1	93.57	0	3	MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE	MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE	348	1574	N/A
+sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	93.24	74	5	0	239	312	3147	3368	1e-72	 151	gi|283855845|gb|GQ290303.1|	342	69	73	0	98.65	0	3	ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ	ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ	348	4301	N/A
+sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	91.53	59	5	0	177	235	2855	3031	1e-72	 126	gi|283855845|gb|GQ290303.1|	284	54	57	0	96.61	0	2	RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA	RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS	348	4301	N/A
+sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	96.40	111	4	0	11	121	1	333	9e-67	 229	gi|283855845|gb|GQ290303.1|	523	107	109	0	98.20	0	1	VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG	VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG	348	4301	N/A
+sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	93.22	59	4	0	119	177	1404	1580	1e-32	 122	gi|283855845|gb|GQ290303.1|	276	55	56	0	94.92	0	3	LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR	LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR	348	4301	N/A
+sp|P08100|OPSD_HUMAN	gi|283855845|gb|GQ290303.1|	88.46	26	3	0	312	337	4222	4299	2e-12	57.7	gi|283855845|gb|GQ290303.1|	125	23	24	0	92.31	0	1	QFRNCMLTTICCGKNPLGDDEASATV	QFRNCMLTTLCCGKNPLGDDEASTTA	348	4301	N/A
+sp|P08100|OPSD_HUMAN	gi|283855822|gb|GQ290312.1|	95.09	326	16	0	11	336	1	978	0.0	 658	gi|283855822|gb|GQ290312.1|	1517	310	322	0	98.77	0	1	VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT	VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT	348	983	N/A
+sp|P08100|OPSD_HUMAN	gi|18148870|dbj|AB062417.1|	93.39	348	23	0	1	348	1	1044	0.0	 711	gi|18148870|dbj|AB062417.1|	1640	325	337	0	96.84	0	1	MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA	MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA	348	1047	N/A
+sp|P08100|OPSD_HUMAN	gi|12583664|dbj|AB043817.1|	82.16	342	60	1	1	341	23	1048	0.0	 626	gi|12583664|dbj|AB043817.1|	1444	281	311	1	90.94	0	2	MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE	MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE	348	1344	N/A
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst	Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/README.rst	Thu Dec 05 06:55:59 2013 -0500
@@ -5,12 +5,12 @@
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 See the licence text below.
 
-Currently tested with NCBI BLAST 2.2.26+ (i.e. version 2.2.26 of BLAST+),
-and does not work with the NCBI 'legacy' BLAST suite (e.g. blastall).
+Currently tested with NCBI BLAST 2.2.28+ (i.e. version 2.2.28 of BLAST+),
+and does not work with the NCBI 'legacy' BLAST suite (e.g. ``blastall``).
 
 Note that these wrappers (and the associated datatypes) were originally
 distributed as part of the main Galaxy repository, but as of August 2012
-moved to the Galaxy Tool Shed as 'ncbi_blast_plus' (and 'blast_datatypes').
+moved to the Galaxy Tool Shed as ``ncbi_blast_plus`` (and ``blast_datatypes``).
 My thanks to Dannon Baker from the Galaxy development team for his assistance
 with this.
 
@@ -22,9 +22,9 @@
 ======================
 
 Galaxy should be able to automatically install the dependencies, i.e. the
-'blast_datatypes' repository which defines the BLAST XML file format
-('blastxml') and protein and nucleotide BLAST databases ('blastdbp' and
-'blastdbn').
+``blast_datatypes`` repository which defines the BLAST XML file format
+(``blastxml``) and protein and nucleotide BLAST databases (``blastdbp`` and
+``blastdbn``).
 
 You must tell Galaxy about any system level BLAST databases using configuration
 files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
@@ -42,9 +42,9 @@
 ===================
 
 For those not using Galaxy's automated installation from the Tool Shed, put
-the XML and Python files in the tools/ncbi_blast_plus/ folder and add the XML
-files to your tool_conf.xml as normal (and do the same in tool_conf.xml.sample
-in order to run the unit tests). For example, use::
+the XML and Python files in the ``tools/ncbi_blast_plus/`` folder and add the
+XML files to your ``tool_conf.xml`` as normal (and do the same in
+``tool_conf.xml.sample`` in order to run the unit tests). For example, use::
 
   
@@ -53,6 +53,7 @@ + @@ -60,18 +61,21 @@
-You will also need to install 'blast_datatypes' from the Tool Shed. This -defines the BLAST XML file format ('blastxml') and protein and nucleotide -BLAST databases composite file formats ('blastdbp' and 'blastdbn'). +You will also need to install ``blast_datatypes`` from the Tool Shed. This +defines the BLAST XML file format (``blastxml``) and protein and nucleotide +BLAST databases composite file formats (``blastdbp`` and ``blastdbn``): + +* http://toolshed.g2.bx.psu.edu/view/devteam/blast_datatypes As described above for an automated installation, you must also tell Galaxy -about any system level BLAST databases using the tool-data/blastdb*.loc files. +about any system level BLAST databases using the ``tool-data/blastdb*.loc`` +files. You must install the NCBI BLAST+ standalone tools somewhere on the system -path. Currently the unit tests are written using "BLAST 2.2.26+". +path. Currently the unit tests are written using "BLAST 2.2.28+". Run the functional tests (adjusting the section identifier to match your -tool_conf.xml.sample file):: +``tool_conf.xml.sample`` file):: ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools @@ -117,6 +121,21 @@ - Adopted standard MIT License. - Development moved to GitHub, https://github.com/peterjc/galaxy_blast - Updated citation information (Cock et al. 2013). +v0.0.21 - Use macros to simplify the XML wrappers. + - Added wrapper for dustmasker + - Enabled masking for makeblastdb + - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes + defined in updated blast_datatypes on Galaxy ToolShed. + - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26 + - Now depends on package_blast_plus_2_2_27 in ToolShed +v0.0.22 - More use macros to simplify the wrappers + - Set number of threads via $GALAXY_SLOTS environment variable + - More descriptive default output names + - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18) + - Pre-check for duplicate identifiers in makeblastdb wrapper. + - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27 + - Now depends on package_blast_plus_2_2_28 in ToolShed + - Extended tabular output includes 'salltitles' as column 25. ======= ====================================================================== @@ -140,11 +159,16 @@ For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use the following command from the GitHub repository root folder:: - $ ./ncbi_blast_plus/make_ncbi_blast_plus.sh + $ tools/ncbi_blast_plus/make_ncbi_blast_plus.sh This simplifies ensuring a consistent set of files is bundled each time, including all the relevant test files. +When updating the version of BLAST+, many of the sample data files used for +the unit tests must be regenerated. This script automates that task:: + + $ tools/ncbi_blast_plus/update_test_files.sh + Licence (MIT) ============= diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/blastxml_to_tabular.py --- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Thu Dec 05 06:55:59 2013 -0500 @@ -31,7 +31,7 @@ ====== ============= =========================================== Column NCBI name Description ------ ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' + 13 sallseqid All subject Seq-id(s), separated by ';' 14 score Raw score 15 nident Number of identical matches 16 positive Number of positive-scoring matches @@ -43,6 +43,7 @@ 22 sseq Aligned part of subject sequence 23 qlen Query sequence length 24 slen Subject sequence length + 25 salltitles All subject titles, separated by '<>' ====== ============= =========================================== Most of these fields are given explicitly in the XML file, others some like @@ -63,7 +64,7 @@ import re if "-v" in sys.argv or "--version" in sys.argv: - print "v0.0.12" + print "v0.0.22" sys.exit(0) if sys.version_info[:2] >= ( 2, 5 ): @@ -89,11 +90,11 @@ if out_fmt == "std": extended = False elif out_fmt == "x22": - stop_err("Format argument x22 has been replaced with ext (extended 24 columns)") + stop_err("Format argument x22 has been replaced with ext (extended 25 columns)") elif out_fmt == "ext": extended = True else: - stop_err("Format argument should be std (12 column) or ext (extended 24 columns)") + stop_err("Format argument should be std (12 column) or ext (extended 25 columns), not: %r" % out_fmt) # get an iterable @@ -157,6 +158,11 @@ # Subject_1 # #apparently depending on the parse_deflines switch + # + #Or, with BLAST 2.2.28+ can get this, + # gnl|BL_ORD_ID|2 + # chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence + # 2 sseqid = hit.findtext("Hit_id").split(None,1)[0] hit_def = sseqid + " " + hit.findtext("Hit_def") if re_default_subject_id.match(sseqid) \ @@ -164,6 +170,11 @@ #Place holder ID, take the first word of the subject definition hit_def = hit.findtext("Hit_def") sseqid = hit_def.split(None,1)[0] + if sseqid.startswith("gnl|BL_ORD_ID|") \ + and sseqid == "gnl|BL_ORD_ID|" + hit.findtext("Hit_accession"): + #Alternative place holder ID, again take the first word of hit_def + hit_def = hit.findtext("Hit_def") + sseqid = hit_def.split(None,1)[0] # for every within for hsp in hit.findall("Hit_hsps/Hsp"): nident = hsp.findtext("Hsp_identity") @@ -228,7 +239,11 @@ ] if extended: - sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(">")) + try: + sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >")) + salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >")) + except IndexError as e: + stop_err("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e)) #print hit_def, "-->", sallseqid positive = hsp.findtext("Hsp_positive") ppos = "%0.2f" % (100*float(positive)/float(length)) @@ -252,6 +267,7 @@ h_seq, str(qlen), str(slen), + salltitles, ]) #print "\t".join(values) outfile.write("\t".join(values) + "\n") diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/blastxml_to_tabular.xml --- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,4 +1,4 @@ - + Convert BLAST XML output to tabular blastxml_to_tabular.py --version @@ -17,7 +17,7 @@ - + @@ -70,6 +70,16 @@ + + + + + + + + + + @@ -104,13 +114,13 @@ but this takes longer to calculate. Most (but not all) of these columns are included by selecting the extended tabular output. The extra columns are included *after* the standard 12 columns. This is so that you can write -workflow filtering steps that accept either the 12 or 22 column tabular -BLAST output. This tool now uses this extended 24 column output by default. +workflow filtering steps that accept either the 12 or 25 column tabular +BLAST output. This tool now uses this extended 25 column output by default. ====== ============= =========================================== Column NCBI name Description ------ ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' + 13 sallseqid All subject Seq-id(s), separated by ';' 14 score Raw score 15 nident Number of identical matches 16 positive Number of positive-scoring matches @@ -122,6 +132,7 @@ 22 sseq Aligned part of subject sequence 23 qlen Query sequence length 24 slen Subject sequence length + 25 salltitles All subject title(s), separated by '<>' ====== ============= =========================================== Beware that the XML file (and thus the conversion) and the tabular output diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/check_no_duplicates.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/check_no_duplicates.py Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,46 @@ +#!/usr/bin/env python +"""Check for duplicate sequence identifiers in FASTA files. + +This is run as a pre-check before makeblastdb, in order to avoid +a regression bug in BLAST+ 2.2.28 which fails to catch this. See: +http://blastedbio.blogspot.co.uk/2012/10/my-ids-not-good-enough-for-ncbi-blast.html + +This script takes one or more FASTA filenames as input, and +will return a non-zero error if any duplicate identifiers +are found. +""" +import sys +import os + +if "-v" in sys.argv or "--version" in sys.argv: + print("v0.0.22") + sys.exit(0) + +def stop_err(msg, error=1): + sys.stderr.write("%s\n" % msg) + sys.exit(error) + + +identifiers = set() +files = 0 +for filename in sys.argv[1:]: + if not os.path.isfile(filename): + stop_err("Missing FASTA file %r" % filename, 2) + files += 1 + handle = open(filename) + for line in handle: + if line.startswith(">"): + #The split will also take care of the new line character, + #e.g. ">test\n" and ">test description here\n" both give "test" + seq_id = line[1:].split(None, 1)[0] + if seq_id in identifiers: + handle.close() + stop_err("Repeated identifiers, e.g. %r" % seq_id, 1) + identifiers.add(seq_id) + handle.close() +if not files: + stop_err("No FASTA files given to check for duplicates", 3) +elif files == 1: + print("%i sequences" % len(identifiers)) +else: + print("%i sequences in %i FASTA files" % (len(identifiers), files)) diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml --- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,46 +1,16 @@ - + Show BLAST database information from blastdbcmd - - blastdbcmd - blast+ - - blastdbcmd -version + + blastdbcmd + ncbi_macros.xml + + blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info" - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - + @@ -60,17 +30,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,10 +1,10 @@ - + Extract sequence(s) from BLAST database - - blastdbcmd - blast+ - - blastdbcmd -version + + blastdbcmd + ncbi_macros.xml + + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -47,39 +47,9 @@ | sed 's/>\(lcl|\|gnl|BL_ORD_ID|[0-9]* \)/>/1' > "$seq" #end if - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - + @@ -132,17 +102,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,91 +1,40 @@ - + Search nucleotide database with nucleotide query sequence(s) - - - blastn - blast+ - - blastn -version + + + blastn + ncbi_macros.xml + + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces blastn -query "$query" -#if $db_opts.db_opts_selector == "db": - -db "${db_opts.database.fields.path}" -#elif $db_opts.db_opts_selector == "histdb": - -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" -#else: - -subject "$db_opts.subject" -#end if +@BLAST_DB_SUBJECT@ -task $blast_type -evalue $evalue_cutoff --out "$output1" -##Set the extended list here so if/when we add things, saved workflows are not affected -#if str($out_format)=="ext": - -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" -#else: - -outfmt $out_format -#end if --num_threads 8 +@BLAST_OUTPUT@ +@THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -$adv_opts.filter_query $adv_opts.strand -## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string -## Note -max_target_seqs overrides -num_descriptions and -num_alignments -#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): --max_target_seqs $adv_opts.max_hits -#end if +@ADVANCED_OPTIONS@ #if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ): -perc_identity $adv_opts.identity_cutoff #end if -#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): --word_size $adv_opts.word_size -#end if $adv_opts.ungapped -$adv_opts.parse_deflines ## End of advanced options: #end if - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + @@ -96,60 +45,26 @@ --> - - - - - - - - - - - - - - - - - + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - + + + - - - - - - - - - - + + @@ -166,12 +81,7 @@ -.. class:: warningmark - -**Note**. Database searches may take a substantial amount of time. -For large input datasets it is advisable to allow overnight processing. - ------ +@SEARCH_TIME_WARNING@ **What it does** @@ -179,71 +89,11 @@ using the NCBI BLAST+ blastn command line tool. Algorithms include blastn, megablast, and discontiguous megablast. -.. class:: warningmark - -You can also search against a FASTA file of subject nucleotide -sequences. This is *not* advised because it is slower (only one -CPU is used), but more importantly gives e-values for pairwise -searches (very small e-values which will look overly signficiant). -In most cases you should instead turn the other FASTA file into a -database first using *makeblastdb* and search against that. +@FASTA_WARNING@ ----- -**Output format** - -Because Galaxy focuses on processing tabular data, the default output of this -tool is tabular. The standard BLAST+ tabular output contains 12 columns: - -====== ========= ============================================ -Column NCBI name Description ------- --------- -------------------------------------------- - 1 qseqid Query Seq-id (ID of your sequence) - 2 sseqid Subject Seq-id (ID of the database hit) - 3 pident Percentage of identical matches - 4 length Alignment length - 5 mismatch Number of mismatches - 6 gapopen Number of gap openings - 7 qstart Start of alignment in query - 8 qend End of alignment in query - 9 sstart Start of alignment in subject (database hit) - 10 send End of alignment in subject (database hit) - 11 evalue Expectation value (E-value) - 12 bitscore Bit score -====== ========= ============================================ - -The BLAST+ tools can optionally output additional columns of information, -but this takes longer to calculate. Most (but not all) of these columns are -included by selecting the extended tabular output. The extra columns are -included *after* the standard 12 columns. This is so that you can write -workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. Galaxy now uses this extended 24 column output by default. - -====== ============= =========================================== -Column NCBI name Description ------- ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' - 14 score Raw score - 15 nident Number of identical matches - 16 positive Number of positive-scoring matches - 17 gaps Total number of gaps - 18 ppos Percentage of positive-scoring matches - 19 qframe Query frame - 20 sframe Subject frame - 21 qseq Aligned part of query sequence - 22 sseq Aligned part of subject sequence - 23 qlen Query sequence length - 24 slen Subject sequence length -====== ============= =========================================== - -The third option is BLAST XML output, which is designed to be parsed by -another program, and is understood by some Galaxy tools. - -You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). -The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. -The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. -The two query anchored outputs show a multiple sequence alignment between the query and all the matches, -and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). +@OUTPUT_FORMAT@ ------- @@ -252,17 +102,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,156 +1,62 @@ - + Search protein database with protein query sequence(s) - - - blastp - blast+ - - blastp -version + + + blastp + ncbi_macros.xml + + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces blastp -query "$query" -#if $db_opts.db_opts_selector == "db": - -db "${db_opts.database.fields.path}" -#elif $db_opts.db_opts_selector == "histdb": - -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" -#else: - -subject "$db_opts.subject" -#end if +@BLAST_DB_SUBJECT@ -task $blast_type -evalue $evalue_cutoff --out "$output1" -##Set the extended list here so if/when we add things, saved workflows are not affected -#if str($out_format)=="ext": - -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" -#else: - -outfmt $out_format -#end if --num_threads 8 +@BLAST_OUTPUT@ +@THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -$adv_opts.filter_query -matrix $adv_opts.matrix -## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string -## Note -max_target_seqs overrides -num_descriptions and -num_alignments -#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): --max_target_seqs $adv_opts.max_hits -#end if -#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): --word_size $adv_opts.word_size -#end if +@ADVANCED_OPTIONS@ ##Ungapped disabled for now - see comments below ##$adv_opts.ungapped -$adv_opts.parse_deflines ## End of advanced options: #end if - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - - - - - - - - - + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - + + @@ -216,83 +122,18 @@ -.. class:: warningmark - -**Note**. Database searches may take a substantial amount of time. -For large input datasets it is advisable to allow overnight processing. - ------ +@SEARCH_TIME_WARNING@ **What it does** Search a *protein database* using a *protein query*, using the NCBI BLAST+ blastp command line tool. -.. class:: warningmark - -You can also search against a FASTA file of subject protein -sequences. This is *not* advised because it is slower (only one -CPU is used), but more importantly gives e-values for pairwise -searches (very small e-values which will look overly signficiant). -In most cases you should instead turn the other FASTA file into a -database first using *makeblastdb* and search against that. +@FASTA_WARNING@ ----- -**Output format** - -Because Galaxy focuses on processing tabular data, the default output of this -tool is tabular. The standard BLAST+ tabular output contains 12 columns: - -====== ========= ============================================ -Column NCBI name Description ------- --------- -------------------------------------------- - 1 qseqid Query Seq-id (ID of your sequence) - 2 sseqid Subject Seq-id (ID of the database hit) - 3 pident Percentage of identical matches - 4 length Alignment length - 5 mismatch Number of mismatches - 6 gapopen Number of gap openings - 7 qstart Start of alignment in query - 8 qend End of alignment in query - 9 sstart Start of alignment in subject (database hit) - 10 send End of alignment in subject (database hit) - 11 evalue Expectation value (E-value) - 12 bitscore Bit score -====== ========= ============================================ - -The BLAST+ tools can optionally output additional columns of information, -but this takes longer to calculate. Most (but not all) of these columns are -included by selecting the extended tabular output. The extra columns are -included *after* the standard 12 columns. This is so that you can write -workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. Galaxy now uses this extended 24 column output by default. - -====== ============= =========================================== -Column NCBI name Description ------- ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' - 14 score Raw score - 15 nident Number of identical matches - 16 positive Number of positive-scoring matches - 17 gaps Total number of gaps - 18 ppos Percentage of positive-scoring matches - 19 qframe Query frame - 20 sframe Subject frame - 21 qseq Aligned part of query sequence - 22 sseq Aligned part of subject sequence - 23 qlen Query sequence length - 24 slen Subject sequence length -====== ============= =========================================== - -The third option is BLAST XML output, which is designed to be parsed by -another program, and is understood by some Galaxy tools. - -You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). -The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. -The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. -The two query anchored outputs show a multiple sequence alignment between the query and all the matches, -and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). +@OUTPUT_FORMAT@ ------- @@ -301,17 +142,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,173 +1,55 @@ - + Search protein database with translated nucleotide query sequence(s) - - - blastx - blast+ - - blastx -version + + + blastx + ncbi_macros.xml + + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces blastx -query "$query" -#if $db_opts.db_opts_selector == "db": - -db "${db_opts.database.fields.path}" -#elif $db_opts.db_opts_selector == "histdb": - -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" -#else: - -subject "$db_opts.subject" -#end if +@BLAST_DB_SUBJECT@ -query_gencode $query_gencode -evalue $evalue_cutoff --out "$output1" -##Set the extended list here so if/when we add things, saved workflows are not affected -#if str($out_format)=="ext": - -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" -#else: - -outfmt $out_format -#end if --num_threads 8 +@BLAST_OUTPUT@ +@THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -$adv_opts.filter_query $adv_opts.strand -matrix $adv_opts.matrix -## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string -## Note -max_target_seqs overrides -num_descriptions and -num_alignments -#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): --max_target_seqs $adv_opts.max_hits -#end if -#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): --word_size $adv_opts.word_size -#end if +@ADVANCED_OPTIONS@ $adv_opts.ungapped -$adv_opts.parse_deflines ## End of advanced options: #end if - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - - - - - - - - + + @@ -204,83 +86,18 @@ -.. class:: warningmark - -**Note**. Database searches may take a substantial amount of time. -For large input datasets it is advisable to allow overnight processing. - ------ +@SEARCH_TIME_WARNING@ **What it does** Search a *protein database* using a *translated nucleotide query*, using the NCBI BLAST+ blastx command line tool. -.. class:: warningmark - -You can also search against a FASTA file of subject protein -sequences. This is *not* advised because it is slower (only one -CPU is used), but more importantly gives e-values for pairwise -searches (very small e-values which will look overly signficiant). -In most cases you should instead turn the other FASTA file into a -database first using *makeblastdb* and search against that. +@FASTA_WARNING@ ----- -**Output format** - -Because Galaxy focuses on processing tabular data, the default output of this -tool is tabular. The standard BLAST+ tabular output contains 12 columns: - -====== ========= ============================================ -Column NCBI name Description ------- --------- -------------------------------------------- - 1 qseqid Query Seq-id (ID of your sequence) - 2 sseqid Subject Seq-id (ID of the database hit) - 3 pident Percentage of identical matches - 4 length Alignment length - 5 mismatch Number of mismatches - 6 gapopen Number of gap openings - 7 qstart Start of alignment in query - 8 qend End of alignment in query - 9 sstart Start of alignment in subject (database hit) - 10 send End of alignment in subject (database hit) - 11 evalue Expectation value (E-value) - 12 bitscore Bit score -====== ========= ============================================ - -The BLAST+ tools can optionally output additional columns of information, -but this takes longer to calculate. Most (but not all) of these columns are -included by selecting the extended tabular output. The extra columns are -included *after* the standard 12 columns. This is so that you can write -workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. Galaxy now uses this extended 24 column output by default. - -====== ============= =========================================== -Column NCBI name Description ------- ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' - 14 score Raw score - 15 nident Number of identical matches - 16 positive Number of positive-scoring matches - 17 gaps Total number of gaps - 18 ppos Percentage of positive-scoring matches - 19 qframe Query frame - 20 sframe Subject frame - 21 qseq Aligned part of query sequence - 22 sseq Aligned part of subject sequence - 23 qlen Query sequence length - 24 slen Subject sequence length -====== ============= =========================================== - -The third option is BLAST XML output, which is designed to be parsed by -another program, and is understood by some Galaxy tools. - -You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). -The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. -The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. -The two query anchored outputs show a multiple sequence alignment between the query and all the matches, -and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). +@OUTPUT_FORMAT@ ------- @@ -289,17 +106,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,101 @@ + + + masks low complexity regions + + dustmasker + ncbi_macros.xml + + + +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +dustmasker +#if $db_opts.db_opts_selector == "db": + -in "${db_opts.database.fields.path}" -infmt blastdb +#elif $db_opts.db_opts_selector == "histdb": + -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb +#else: + -in "$subject" -infmt fasta +#end if +-out "$outfile" +-window $window -level $level -linker $linker -outfmt $outformat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm. + +If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. + +More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers (a more specific paper covering this wrapper is planned): + +@REFERENCES@ + + diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,382 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @BINARY@ + blast+ + + @BINARY@ -version + + + + + + + + + + + + + + -num_threads "\${GALAXY_SLOTS:-8}" + +#if $db_opts.db_opts_selector == "db": + -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "histdb": + -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" +#else: + -subject "$db_opts.subject" +#end if + + -out "$output1" +##Set the extended list here so when we add things, saved workflows are not affected +#if str($out_format)=="ext": + -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles" +#else: + -outfmt $out_format +#end if + + $adv_opts.filter_query +## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string +## Note -max_target_seqs overrides -num_descriptions and -num_alignments +#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): +-max_target_seqs $adv_opts.max_hits +#end if +#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): +-word_size $adv_opts.word_size +#end if +$adv_opts.parse_deflines + + + #if str($db_opts.db_opts_selector)=='db' +${db_opts.database} +#elif str($db_opts.db_opts_selector)=='histdb' +${db_opts.histdb.name} +#else +${db_opts.subject.name} +#end if + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Christiam Camacho et al. (2009). +BLAST+: architecture and applications. +BMC Bioinformatics. 15;10:421. +http://dx.doi.org/10.1186/1471-2105-10-421 + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus + + **Output format** + +Because Galaxy focuses on processing tabular data, the default output of this +tool is tabular. The standard BLAST+ tabular output contains 12 columns: + +====== ========= ============================================ +Column NCBI name Description +------ --------- -------------------------------------------- + 1 qseqid Query Seq-id (ID of your sequence) + 2 sseqid Subject Seq-id (ID of the database hit) + 3 pident Percentage of identical matches + 4 length Alignment length + 5 mismatch Number of mismatches + 6 gapopen Number of gap openings + 7 qstart Start of alignment in query + 8 qend End of alignment in query + 9 sstart Start of alignment in subject (database hit) + 10 send End of alignment in subject (database hit) + 11 evalue Expectation value (E-value) + 12 bitscore Bit score +====== ========= ============================================ + +The BLAST+ tools can optionally output additional columns of information, +but this takes longer to calculate. Most (but not all) of these columns are +included by selecting the extended tabular output. The extra columns are +included *after* the standard 12 columns. This is so that you can write +workflow filtering steps that accept either the 12 or 25 column tabular +BLAST output. Galaxy now uses this extended 25 column output by default. + +====== ============= =========================================== +Column NCBI name Description +------ ------------- ------------------------------------------- + 13 sallseqid All subject Seq-id(s), separated by ';' + 14 score Raw score + 15 nident Number of identical matches + 16 positive Number of positive-scoring matches + 17 gaps Total number of gaps + 18 ppos Percentage of positive-scoring matches + 19 qframe Query frame + 20 sframe Subject frame + 21 qseq Aligned part of query sequence + 22 sseq Aligned part of subject sequence + 23 qlen Query sequence length + 24 slen Subject sequence length + 25 salltitles All subject title(s), separated by '<>' +====== ============= =========================================== + +The third option is BLAST XML output, which is designed to be parsed by +another program, and is understood by some Galaxy tools. + +You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). +The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. +The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. +The two query anchored outputs show a multiple sequence alignment between the query and all the matches, +and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). + + .. class:: warningmark + +You can also search against a FASTA file of subject (target) +sequences. This is *not* advised because it is slower (only one +CPU is used), but more importantly gives e-values for pairwise +searches (very small e-values which will look overly signficiant). +In most cases you should instead turn the other FASTA file into a +database first using *makeblastdb* and search against that. + + .. class:: warningmark + +**Note**. Database searches may take a substantial amount of time. +For large input datasets it is advisable to allow overnight processing. + +----- + + diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_makeblastdb.xml --- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,11 +1,17 @@ - + Make BLAST database - - makeblastdb - blast+ - - makeblastdb -version - + + makeblastdb + ncbi_macros.xml + + + check_no_duplicates.py +##First check for duplicates (since BLAST+ 2.2.28 fails to do so) +##and abort (via the ampersand ampersand trick) if any are found. +#for $i in $in +"${i.file}" +#end for +&& makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" $parse_seqids $hash_index @@ -24,54 +30,55 @@ -title "BLAST Database" #end if -dbtype $dbtype -## #set $sep = '-mask_data ' -## #for $i in $mask_data -## $sep${i.file} -## #set $set = ', ' -## #end for +#set $mask_string = '' +#set $sep = '-mask_data ' +#for $i in $mask_data +#set $mask_string += $sep + str($i.file) +#set $sep = ',' +#end for +$mask_string +## #set $gi_mask_string = '' ## #set $sep = '-gi_mask -gi_mask_name ' ## #for $i in $gi_mask -## $sep${i.file} -## #set $set = ', ' -## #end for +## #set $gi_mask_string += $sep + str($i.file) +## #set $sep = ',' +## #end for +## $gi_mask_string ## #if $tax.select == 'id': ## -taxid $tax.id ## #else if $tax.select == 'map': ## -taxid_map $tax.map ## #end if +## -------------------------------------------------------------------- +## Capture the stdout log information to the primary file (plain text): +>> "$outfile" - - - - - - - - + + - - + + + + @@ -104,6 +111,25 @@ + + + + + + + + + + + + + + + + + + **What it does** @@ -127,17 +153,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,12 +1,12 @@ - + Search protein domain database (PSSMs) with protein query sequence(s) - - - rpsblast - blast+ - - rpsblast -version + + + deltablast + ncbi_macros.xml + + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -18,121 +18,43 @@ -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" #end if -evalue $evalue_cutoff --out "$output1" -##Set the extended list here so if/when we add things, saved workflows are not affected -#if str($out_format)=="ext": - -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" -#else: - -outfmt $out_format -#end if --num_threads 8 +@BLAST_OUTPUT@ +@THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -$adv_opts.filter_query -## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string -## Note -max_target_seqs overrides -num_descriptions and -num_alignments -#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): --max_target_seqs $adv_opts.max_hits -#end if -#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): --word_size $adv_opts.word_size -#end if -$adv_opts.parse_deflines +@ADVANCED_OPTIONS@ ## End of advanced options: #end if - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - + + + -.. class:: warningmark - -**Note**. Database searches may take a substantial amount of time. -For large input datasets it is advisable to allow overnight processing. - ------ +@SEARCH_TIME_WARNING@ **What it does** @@ -171,60 +93,7 @@ ----- -**Output format** - -Because Galaxy focuses on processing tabular data, the default output of this -tool is tabular. The standard BLAST+ tabular output contains 12 columns: - -====== ========= ============================================ -Column NCBI name Description ------- --------- -------------------------------------------- - 1 qseqid Query Seq-id (ID of your sequence) - 2 sseqid Subject Seq-id (ID of the database hit) - 3 pident Percentage of identical matches - 4 length Alignment length - 5 mismatch Number of mismatches - 6 gapopen Number of gap openings - 7 qstart Start of alignment in query - 8 qend End of alignment in query - 9 sstart Start of alignment in subject (database hit) - 10 send End of alignment in subject (database hit) - 11 evalue Expectation value (E-value) - 12 bitscore Bit score -====== ========= ============================================ - -The BLAST+ tools can optionally output additional columns of information, -but this takes longer to calculate. Most (but not all) of these columns are -included by selecting the extended tabular output. The extra columns are -included *after* the standard 12 columns. This is so that you can write -workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. Galaxy now uses this extended 24 column output by default. - -====== ============= =========================================== -Column NCBI name Description ------- ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' - 14 score Raw score - 15 nident Number of identical matches - 16 positive Number of positive-scoring matches - 17 gaps Total number of gaps - 18 ppos Percentage of positive-scoring matches - 19 qframe Query frame - 20 sframe Subject frame - 21 qseq Aligned part of query sequence - 22 sseq Aligned part of subject sequence - 23 qlen Query sequence length - 24 slen Subject sequence length -====== ============= =========================================== - -The third option is BLAST XML output, which is designed to be parsed by -another program, and is understood by some Galaxy tools. - -You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). -The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. -The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. -The two query anchored outputs show a multiple sequence alignment between the query and all the matches, -and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). +@OUTPUT_FORMAT@ ------- @@ -233,17 +102,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,12 +1,12 @@ - + Search protein domain database (PSSMs) with translated nucleotide query sequence(s) - - - rpstblastn - blast+ - - rpstblastn -version + + + rpstblastn + ncbi_macros.xml + + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces @@ -18,122 +18,41 @@ -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" #end if -evalue $evalue_cutoff --out "$output1" -## Set the extended list here so if/when we add things, saved workflows are not affected -#if str($out_format)=="ext": - -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" -#else: - -outfmt $out_format -#end if +@BLAST_OUTPUT@ ## rpstblastn does not support multiple threads up to release 2.2.27+. Added in BLAST 2.2.28+. ##-num_threads 8 #if $adv_opts.adv_opts_selector=="advanced": -$adv_opts.filter_query -## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string -## Note -max_target_seqs overrides -num_descriptions and -num_alignments -#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): --max_target_seqs $adv_opts.max_hits -#end if -#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): --word_size $adv_opts.word_size -#end if -$adv_opts.parse_deflines +@ADVANCED_OPTIONS@ ## End of advanced options: #end if - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - - - - - - - - - + -.. class:: warningmark - -**Note**. Database searches may take a substantial amount of time. -For large input datasets it is advisable to allow overnight processing. - ------ +@SEARCH_TIME_WARNING@ **What it does** @@ -172,60 +91,7 @@ ----- -**Output format** - -Because Galaxy focuses on processing tabular data, the default output of this -tool is tabular. The standard BLAST+ tabular output contains 12 columns: - -====== ========= ============================================ -Column NCBI name Description ------- --------- -------------------------------------------- - 1 qseqid Query Seq-id (ID of your sequence) - 2 sseqid Subject Seq-id (ID of the database hit) - 3 pident Percentage of identical matches - 4 length Alignment length - 5 mismatch Number of mismatches - 6 gapopen Number of gap openings - 7 qstart Start of alignment in query - 8 qend End of alignment in query - 9 sstart Start of alignment in subject (database hit) - 10 send End of alignment in subject (database hit) - 11 evalue Expectation value (E-value) - 12 bitscore Bit score -====== ========= ============================================ - -The BLAST+ tools can optionally output additional columns of information, -but this takes longer to calculate. Most (but not all) of these columns are -included by selecting the extended tabular output. The extra columns are -included *after* the standard 12 columns. This is so that you can write -workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. Galaxy now uses this extended 24 column output by default. - -====== ============= =========================================== -Column NCBI name Description ------- ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' - 14 score Raw score - 15 nident Number of identical matches - 16 positive Number of positive-scoring matches - 17 gaps Total number of gaps - 18 ppos Percentage of positive-scoring matches - 19 qframe Query frame - 20 sframe Subject frame - 21 qseq Aligned part of query sequence - 22 sseq Aligned part of subject sequence - 23 qlen Query sequence length - 24 slen Subject sequence length -====== ============= =========================================== - -The third option is BLAST XML output, which is designed to be parsed by -another program, and is understood by some Galaxy tools. - -You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). -The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. -The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. -The two query anchored outputs show a multiple sequence alignment between the query and all the matches, -and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). +@OUTPUT_FORMAT@ ------- @@ -234,17 +100,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,173 +1,59 @@ - + Search translated nucleotide database with protein query sequence(s) - - - tblastn - blast+ - - tblastn -version + + + tblastn + ncbi_macros.xml + + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces tblastn -query "$query" -#if $db_opts.db_opts_selector == "db": - -db "${db_opts.database.fields.path}" -#elif $db_opts.db_opts_selector == "histdb": - -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" -#else: - -subject "$db_opts.subject" -#end if +@BLAST_DB_SUBJECT@ -evalue $evalue_cutoff --out "$output1" -##Set the extended list here so if/when we add things, saved workflows are not affected -#if str($out_format)=="ext": - -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" -#else: - -outfmt $out_format -#end if --num_threads 8 +@BLAST_OUTPUT@ +@THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -db_gencode $adv_opts.db_gencode -$adv_opts.filter_query -matrix $adv_opts.matrix -## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string -## Note -max_target_seqs overrides -num_descriptions and -num_alignments -#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): --max_target_seqs $adv_opts.max_hits -#end if -#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): --word_size $adv_opts.word_size -#end if +@ADVANCED_OPTIONS@ ##Ungapped disabled for now - see comments below ##$adv_opts.ungapped -$adv_opts.parse_deflines ## End of advanced options: #end if - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - + + @@ -250,83 +136,18 @@ -.. class:: warningmark - -**Note**. Database searches may take a substantial amount of time. -For large input datasets it is advisable to allow overnight processing. - ------ +@SEARCH_TIME_WARNING@ **What it does** Search a *translated nucleotide database* using a *protein query*, using the NCBI BLAST+ tblastn command line tool. -.. class:: warningmark - -You can also search against a FASTA file of subject nucleotide -sequences. This is *not* advised because it is slower (only one -CPU is used), but more importantly gives e-values for pairwise -searches (very small e-values which will look overly signficiant). -In most cases you should instead turn the other FASTA file into a -database first using *makeblastdb* and search against that. +@FASTA_WARNING@ ----- -**Output format** - -Because Galaxy focuses on processing tabular data, the default output of this -tool is tabular. The standard BLAST+ tabular output contains 12 columns: - -====== ========= ============================================ -Column NCBI name Description ------- --------- -------------------------------------------- - 1 qseqid Query Seq-id (ID of your sequence) - 2 sseqid Subject Seq-id (ID of the database hit) - 3 pident Percentage of identical matches - 4 length Alignment length - 5 mismatch Number of mismatches - 6 gapopen Number of gap openings - 7 qstart Start of alignment in query - 8 qend End of alignment in query - 9 sstart Start of alignment in subject (database hit) - 10 send End of alignment in subject (database hit) - 11 evalue Expectation value (E-value) - 12 bitscore Bit score -====== ========= ============================================ - -The BLAST+ tools can optionally output additional columns of information, -but this takes longer to calculate. Most (but not all) of these columns are -included by selecting the extended tabular output. The extra columns are -included *after* the standard 12 columns. This is so that you can write -workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. Galaxy now uses this extended 24 column output by default. - -====== ============= =========================================== -Column NCBI name Description ------- ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' - 14 score Raw score - 15 nident Number of identical matches - 16 positive Number of positive-scoring matches - 17 gaps Total number of gaps - 18 ppos Percentage of positive-scoring matches - 19 qframe Query frame - 20 sframe Subject frame - 21 qseq Aligned part of query sequence - 22 sseq Aligned part of subject sequence - 23 qlen Query sequence length - 24 slen Subject sequence length -====== ============= =========================================== - -The third option is BLAST XML output, which is designed to be parsed by -another program, and is understood by some Galaxy tools. - -You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). -The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. -The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. -The two query anchored outputs show a multiple sequence alignment between the query and all the matches, -and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). +@OUTPUT_FORMAT@ ------- @@ -335,17 +156,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,193 +1,59 @@ - + Search translated nucleotide database with translated nucleotide query sequence(s) - - - tblastx - blast+ - - tblastx -version + + + tblastx + ncbi_macros.xml + + ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces tblastx -query "$query" -#if $db_opts.db_opts_selector == "db": - -db "${db_opts.database.fields.path}" -#elif $db_opts.db_opts_selector == "histdb": - -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" -#else: - -subject "$db_opts.subject" -#end if +@BLAST_DB_SUBJECT@ -query_gencode $query_gencode -evalue $evalue_cutoff --out "$output1" -##Set the extended list here so if/when we add things, saved workflows are not affected -#if str($out_format)=="ext": - -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen" -#else: - -outfmt $out_format -#end if --num_threads 8 +@BLAST_OUTPUT@ +@THREADS@ #if $adv_opts.adv_opts_selector=="advanced": -db_gencode $adv_opts.db_gencode -$adv_opts.filter_query $adv_opts.strand -matrix $adv_opts.matrix ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string ## Note -max_target_seqs overrides -num_descriptions and -num_alignments -#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): --max_target_seqs $adv_opts.max_hits -#end if -#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): --word_size $adv_opts.word_size -#end if -$adv_opts.parse_deflines +@ADVANCED_OPTIONS@ ## End of advanced options: #end if - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + - - - - - - - - - - + + @@ -204,83 +70,18 @@ -.. class:: warningmark - -**Note**. Database searches may take a substantial amount of time. -For large input datasets it is advisable to allow overnight processing. - ------ +@SEARCH_TIME_WARNING@ **What it does** Search a *translated nucleotide database* using a *protein query*, using the NCBI BLAST+ tblastx command line tool. -.. class:: warningmark - -You can also search against a FASTA file of subject nucleotide -sequences. This is *not* advised because it is slower (only one -CPU is used), but more importantly gives e-values for pairwise -searches (very small e-values which will look overly signficiant). -In most cases you should instead turn the other FASTA file into a -database first using *makeblastdb* and search against that. +@FASTA_WARNING@ ----- -**Output format** - -Because Galaxy focuses on processing tabular data, the default output of this -tool is tabular. The standard BLAST+ tabular output contains 12 columns: - -====== ========= ============================================ -Column NCBI name Description ------- --------- -------------------------------------------- - 1 qseqid Query Seq-id (ID of your sequence) - 2 sseqid Subject Seq-id (ID of the database hit) - 3 pident Percentage of identical matches - 4 length Alignment length - 5 mismatch Number of mismatches - 6 gapopen Number of gap openings - 7 qstart Start of alignment in query - 8 qend End of alignment in query - 9 sstart Start of alignment in subject (database hit) - 10 send End of alignment in subject (database hit) - 11 evalue Expectation value (E-value) - 12 bitscore Bit score -====== ========= ============================================ - -The BLAST+ tools can optionally output additional columns of information, -but this takes longer to calculate. Most (but not all) of these columns are -included by selecting the extended tabular output. The extra columns are -included *after* the standard 12 columns. This is so that you can write -workflow filtering steps that accept either the 12 or 24 column tabular -BLAST output. Galaxy now uses this extended 24 column output by default. - -====== ============= =========================================== -Column NCBI name Description ------- ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by a ';' - 14 score Raw score - 15 nident Number of identical matches - 16 positive Number of positive-scoring matches - 17 gaps Total number of gaps - 18 ppos Percentage of positive-scoring matches - 19 qframe Query frame - 20 sframe Subject frame - 21 qseq Aligned part of query sequence - 22 sseq Aligned part of subject sequence - 23 qlen Query sequence length - 24 slen Subject sequence length -====== ============= =========================================== - -The third option is BLAST XML output, which is designed to be parsed by -another program, and is understood by some Galaxy tools. - -You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). -The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. -The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. -The two query anchored outputs show a multiple sequence alignment between the query and all the matches, -and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). +@OUTPUT_FORMAT@ ------- @@ -289,17 +90,6 @@ If you use this Galaxy tool in work leading to a scientific publication please cite the following papers: -Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). -Galaxy tools and workflows for sequence analysis with applications -in molecular plant pathology. PeerJ 1:e167 -http://dx.doi.org/10.7717/peerj.167 - -Christiam Camacho et al. (2009). -BLAST+: architecture and applications. -BMC Bioinformatics. 15;10:421. -http://dx.doi.org/10.1186/1471-2105-10-421 - -This wrapper is available to install into other Galaxy Instances via the Galaxy -Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus +@REFERENCES@ diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/repository_dependencies.xml --- a/tools/ncbi_blast_plus/repository_dependencies.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/repository_dependencies.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,4 +1,4 @@ - + diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/tool_dependencies.xml --- a/tools/ncbi_blast_plus/tool_dependencies.xml Mon Sep 23 06:14:13 2013 -0400 +++ b/tools/ncbi_blast_plus/tool_dependencies.xml Thu Dec 05 06:55:59 2013 -0500 @@ -1,6 +1,6 @@ - - + +