Mercurial > repos > devteam > ncbi_blast_plus
changeset 13:623f727cdff1 draft
Uploaded v0.1.00, uses BLAST+ 2.2.29, allows custom column selection for tabular output - including taxonomy fields.
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human.columns.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 4301 1213 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 4301 1213 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 983 1213 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1047 1213
--- a/test-data/blastn_rhodopsin_vs_three_human.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastn_rhodopsin_vs_three_human.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,7 +1,7 @@ -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 9e-133 460 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 7e-94 331 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 8e-74 265 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 8e-69 248 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human.xml Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,549 @@ +<?xml version="1.0"?> +<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> +<BlastOutput> + <BlastOutput_program>blastn</BlastOutput_program> + <BlastOutput_version>BLASTN 2.2.29+</BlastOutput_version> + <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), "A greedy algorithm for aligning DNA sequences", J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference> + <BlastOutput_db></BlastOutput_db> + <BlastOutput_query-ID>Query_1</BlastOutput_query-ID> + <BlastOutput_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</BlastOutput_query-def> + <BlastOutput_query-len>1047</BlastOutput_query-len> + <BlastOutput_param> + <Parameters> + <Parameters_expect>1e-40</Parameters_expect> + <Parameters_sc-match>1</Parameters_sc-match> + <Parameters_sc-mismatch>-2</Parameters_sc-mismatch> + <Parameters_gap-open>0</Parameters_gap-open> + <Parameters_gap-extend>0</Parameters_gap-extend> + <Parameters_filter>L;m;</Parameters_filter> + </Parameters> + </BlastOutput_param> +<BlastOutput_iterations> +<Iteration> + <Iteration_iter-num>1</Iteration_iter-num> + <Iteration_query-ID>Query_1</Iteration_query-ID> + <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>2</Iteration_iter-num> + <Iteration_query-ID>Query_1</Iteration_query-ID> + <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>3</Iteration_iter-num> + <Iteration_query-ID>Query_1</Iteration_query-ID> + <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +<Hit> + <Hit_num>1</Hit_num> + <Hit_id>Subject_3</Hit_id> + <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def> + <Hit_accession>Subject_3</Hit_accession> + <Hit_len>1213</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>1474.75</Hsp_bit-score> + <Hsp_score>798</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>1047</Hsp_query-to> + <Hsp_hit-from>88</Hsp_hit-from> + <Hsp_hit-to>1134</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>964</Hsp_identity> + <Hsp_positive>964</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>1047</Hsp_align-len> + <Hsp_qseq>ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCCTTCTCCAACAAAACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTCCTGCTCATCGTGCTTGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACGGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCTGACCTCTTCATGGTCTTCGGTGGCTTCACCACCACCCTCTACACCTCTCTGCATGGATACTTTGTCTTTGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACACTGGGCGGTGAAATTGCCCTGTGGTCTTTGGTGGTCCTGGCCATTGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCATGCCATAATGGGCGTCGCTTTCACCTGGGTCATGGCACTGGCCTGCGCTGCACCCCCCCTCGTTGGTTGGTCCAGGTACATCCCTGAAGGCATGCAGTGTTCATGCGGGATCGACTACTACACACTCAAGCCAGAAGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATCGTCATCTTCTTTTGCTACGGGCAGCTTGTCTTCACAGTCAAGGAGGCGGCAGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCTGAGAAGGAGGTCACTCGCATGGTCATCATCATGGTCATTGCTTTCCTGATCTGTTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGGTCCAACTTTGGCCCCATCTTCATGACACTCCCGGCGTTCTTCGCAAAGTCCTCCTCCATCTACAACCCTGTCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACTACCCTCTGCTGTGGCAAGAACCCACTGGGTGATGACGAGGCTTCCACAACCGGTTCCAAGACGGAGACCAGCCAGGTGGCACCGGCCTAA</Hsp_qseq> + <Hsp_hseq>ATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGATGAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAA</Hsp_hseq> + <Hsp_midline>||||| || || || ||||| ||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||| | ||||||||||||| |||||||||||||||||||||||||||||| ||||| |||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||| ||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||||| ||||||||||||||||| |||||||| || ||||||||||||||||| ||||||||||| |||||||| |||| || ||||||||||||||||| || ||| ||||||| || || || |||||||||||||| |||||||| || |||||||||||||| |||||||||||||||||||||||||||||||||||||||||||| ||||||| || ||||| |||||||| |||||||| ||||||||||| || |||||||||||||||||||||||||| |||||||| |||||||||||||| ||||||||||||||||||||||| |||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||| |||||||| || |||||||||||||| |||| |||||||| || ||| | || |||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||| ||| ||||||| |||||||||||||||||||| || ||||| || | |||| |||||||||||||| ||||||||||| |||||||||</Hsp_midline> + </Hsp> + </Hit_hsps> +</Hit> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> +</Iteration> +<Iteration> + <Iteration_iter-num>4</Iteration_iter-num> + <Iteration_query-ID>Query_2</Iteration_query-ID> + <Iteration_query-def>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds</Iteration_query-def> + <Iteration_query-len>1574</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>7453579</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>5</Iteration_iter-num> + <Iteration_query-ID>Query_2</Iteration_query-ID> + <Iteration_query-def>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds</Iteration_query-def> + <Iteration_query-len>1574</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>7453579</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>6</Iteration_iter-num> + <Iteration_query-ID>Query_2</Iteration_query-ID> + <Iteration_query-def>gi|2734705|gb|U59921.1|BBU59921 Bufo bufo rhodopsin mRNA, complete cds</Iteration_query-def> + <Iteration_query-len>1574</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>7453579</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>7</Iteration_iter-num> + <Iteration_query-ID>Query_3</Iteration_query-ID> + <Iteration_query-def>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds</Iteration_query-def> + <Iteration_query-len>4301</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>16</Statistics_hsp-len> + <Statistics_eff-space>20482300</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>8</Iteration_iter-num> + <Iteration_query-ID>Query_3</Iteration_query-ID> + <Iteration_query-def>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds</Iteration_query-def> + <Iteration_query-len>4301</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>16</Statistics_hsp-len> + <Statistics_eff-space>20482300</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>9</Iteration_iter-num> + <Iteration_query-ID>Query_3</Iteration_query-ID> + <Iteration_query-def>gi|283855845|gb|GQ290303.1| Cynopterus brachyotis voucher 20020434 rhodopsin (RHO) gene, exons 1 through 5 and partial cds</Iteration_query-def> + <Iteration_query-len>4301</Iteration_query-len> +<Iteration_hits> +<Hit> + <Hit_num>1</Hit_num> + <Hit_id>Subject_3</Hit_id> + <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def> + <Hit_accession>Subject_3</Hit_accession> + <Hit_len>1213</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>460.936</Hsp_bit-score> + <Hsp_score>249</Hsp_score> + <Hsp_evalue>3.59583e-132</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>333</Hsp_query-to> + <Hsp_hit-from>118</Hsp_hit-from> + <Hsp_hit-to>450</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>305</Hsp_identity> + <Hsp_positive>305</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>333</Hsp_align-len> + <Hsp_qseq>GTGCCCTTCTCCAACAAGACAGGCGTGGTGCGCAGTCCCTTCGAGCATCCACAGTACTACCTGGCCGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTCGGCTTCCCCATCAACTTCCTCACGCTCTATGTCACGGTTCAGCACAAGAAGCTGCGTACGCCTCTCAACTACATCCTGCTCAACCTGGCCGTGGCCGACCTCTTCATGGTCTTCGGAGGCTTCACCACCACCCTCTACACCTCCCTGCATGGATACTTTGTCTTCGGGCCTACGGGATGCAATCTGGAGGGCTTTTTTGCCACCCTGGGAGGT</Hsp_qseq> + <Hsp_hseq>GTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGT</Hsp_hseq> + <Hsp_midline>|||||||||||||| ||| || ||||| ||||| ||||||||| | ||||||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||||||||| ||||||||||||||||||||||||||||| ||||| || ||||||||||||||||| ||||||||||||||||||||||||||||| |||||||| ||||||||||||||| | || |||||||||| ||||||||||||||| |||||||||||||| ||||||||||| || ||||||||| |||||||||| |||||||||||||| |||</Hsp_midline> + </Hsp> + <Hsp> + <Hsp_num>2</Hsp_num> + <Hsp_bit-score>331.671</Hsp_bit-score> + <Hsp_score>179</Hsp_score> + <Hsp_evalue>2.94161e-93</Hsp_evalue> + <Hsp_query-from>3127</Hsp_query-from> + <Hsp_query-to>3368</Hsp_query-to> + <Hsp_hit-from>782</Hsp_hit-from> + <Hsp_hit-to>1023</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>222</Hsp_identity> + <Hsp_positive>222</Hsp_positive> + <Hsp_gaps>2</Hsp_gaps> + <Hsp_align-len>243</Hsp_align-len> + <Hsp_qseq>AGGCAGCTGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCCGAGAAGGAGGTCACCCGTATGGTCATCATCATGGTCATTGCTTTCCTAATCTGTTGGCTGCCGTATGCCGGCGTGGCATTCTACATCTTCACCCACCAGGGCTCTAACTTTGGCCCCATCTTCATGACCCTCCCGGCATTCTTTGCCAAG-TCGTCCTCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAG</Hsp_qseq> + <Hsp_hseq>AGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCG-CCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAG</Hsp_hseq> + <Hsp_midline>|||| ||||||||||||||||||||||||||||| |||||||| ||||||||||||||||| |||||||||||||||||||| |||||||| ||||| ||| |||| || ||| |||||||||||||||||||||||||||||||||| ||||| || ||||||||||||||| |||| || |||||||||||| || || |||||||||||||||||||||||||||||||||||||||||</Hsp_midline> + </Hsp> + <Hsp> + <Hsp_num>3</Hsp_num> + <Hsp_bit-score>265.191</Hsp_bit-score> + <Hsp_score>143</Hsp_score> + <Hsp_evalue>3.02604e-73</Hsp_evalue> + <Hsp_query-from>1410</Hsp_query-from> + <Hsp_query-to>1582</Hsp_query-to> + <Hsp_hit-from>448</Hsp_hit-from> + <Hsp_hit-to>620</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>163</Hsp_identity> + <Hsp_positive>163</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>173</Hsp_align-len> + <Hsp_qseq>GGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTATGCAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCCTTGCCCTCACCTGGGTCATGGCACTGGCCTGCGCCGCGCCCCCGCTAGTCGGCTGGTCCAGGTA</Hsp_qseq> + <Hsp_hseq>GGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTA</Hsp_hseq> + <Hsp_midline>|||||||||||||||||||||||||||||||||||||||||||||||||||||||| || ||||||||||||||||||||||||||||||||||| |||||||||||| ||||| |||||||||||||||| |||||||||||||| ||||| || | |||||||||||||||</Hsp_midline> + </Hsp> + <Hsp> + <Hsp_num>4</Hsp_num> + <Hsp_bit-score>248.571</Hsp_bit-score> + <Hsp_score>134</Hsp_score> + <Hsp_evalue>3.04752e-68</Hsp_evalue> + <Hsp_query-from>2854</Hsp_query-from> + <Hsp_query-to>3023</Hsp_query-to> + <Hsp_hit-from>615</Hsp_hit-from> + <Hsp_hit-to>784</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>158</Hsp_identity> + <Hsp_positive>158</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>170</Hsp_align-len> + <Hsp_qseq>CAGGTACATCCCAGAGGGCATGCAGTGCTCATGTGGAATCGACTACTACACCCTCAAGCCGGAGGTCAACAACGAGTCCTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCTATGATTGTCATATTCTTTTGCTATGGACAGCTGGTCTTCACCGTCAAGGAGG</Hsp_qseq> + <Hsp_hseq>CAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGG</Hsp_hseq> + <Hsp_midline>|||||||||||| |||||| |||||||||| |||||||||||||||||||| |||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||| |||||| |||| || || |||||||| ||||| |||||||||||||||||||</Hsp_midline> + </Hsp> + </Hit_hsps> +</Hit> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>16</Statistics_hsp-len> + <Statistics_eff-space>20482300</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> +</Iteration> +<Iteration> + <Iteration_iter-num>10</Iteration_iter-num> + <Iteration_query-ID>Query_4</Iteration_query-ID> + <Iteration_query-def>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds</Iteration_query-def> + <Iteration_query-len>983</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4628008</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>11</Iteration_iter-num> + <Iteration_query-ID>Query_4</Iteration_query-ID> + <Iteration_query-def>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds</Iteration_query-def> + <Iteration_query-len>983</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4628008</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>12</Iteration_iter-num> + <Iteration_query-ID>Query_4</Iteration_query-ID> + <Iteration_query-def>gi|283855822|gb|GQ290312.1| Myotis ricketti voucher GQX10 rhodopsin (RHO) mRNA, partial cds</Iteration_query-def> + <Iteration_query-len>983</Iteration_query-len> +<Iteration_hits> +<Hit> + <Hit_num>1</Hit_num> + <Hit_id>Subject_3</Hit_id> + <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def> + <Hit_accession>Subject_3</Hit_accession> + <Hit_len>1213</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>1323.32</Hsp_bit-score> + <Hsp_score>716</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>959</Hsp_query-to> + <Hsp_hit-from>118</Hsp_hit-from> + <Hsp_hit-to>1076</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>878</Hsp_identity> + <Hsp_positive>878</Hsp_positive> + <Hsp_gaps>0</Hsp_gaps> + <Hsp_align-len>959</Hsp_align-len> + <Hsp_qseq>GTGCCCTTCTCCAACAAGACGGGTGTGGTGCGCAGCCCCTTCGAGTACCCGCAGTACTACCTGGCTGAGCCCTGGCAGTTCTCCATGCTGGCTGCCTACATGTTTCTGCTGATCGTGCTCGGATTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTGGCTGTGGCCAACCTCTTCATGGTCTTTGGAGGCTTCACCACCACCCTGTATACCTCTATGCATGGATACTTCGTCTTCGGGGCCACGGGATGCAATCTGGAGGGCTTCTTTGCCACGCTGGGCGGTGAAATCGCCCTGTGGTCCCTGGTGGTCCTGGCCATCGAGCGGTATGTGGTGGTCTGCAAGCCCATGAGCAACTTCCGCTTTGGGGAGAACCACGCCATCATGGGCCTCGCCTTCACGTGGGTCATGGCACTGGCCTGCGCTGCACCCCCACTAGCCGGCTGGTCCAGGTACATCCCAGAGGGCATGCAGTGCTCGTGTGGGATTGACTACTACACGCTCAAACCGGAGGTCAACAACGAGTCCTTCGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTGTCATTTTCTTCTGCTACGGACAGCTGGTGTTCACAGTGAAGGAGGCGGCTGCCCAGCAGCAGGAGTCAGCCACCACCCAGAAGGCCGAGAAGGAAGTCACGCGCATGGTCATCATCATGGTCGTTGCGTTCCTAATCTGTTGGCTGCCCTACGCCAGCGTGGCATTCTACATCTTTACCCACCAGGGCTCTAACTTTGGCCCTGTCTTCATGACCATCCCGGCATTCTTCGCCAAGTCATCCTCCATCTACAACCCGGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCCTCTGCTGTGGCAAGAACCCACTGGGTGA</Hsp_qseq> + <Hsp_hseq>GTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGA</Hsp_hseq> + <Hsp_midline>|||||||||||||| |||||||||||| |||||||||||||||||||| |||||||||||||||||||| |||||||||||||||||||| |||||||||||||||||||||||||| || ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| || ||||| |||||||||||||| | || |||||||||| |||||| || |||||| ||||||||||||||||||||||| |||| ||||||||| ||||||||||||||||||| |||||||||||||| |||||||||||| ||||||||||||||||||||||||| |||||||| || ||||||||||||||||||||||| ||||||||||| |||||||||||| | |||||||| ||||||||||| ||||||||||| ||||||||||| ||||||||||||||||||||||| |||||| |||||||||||||||| || ||||||||||||||||| |||||||||||||||||||| || |||||||||||||||||||||||||||||||||||||||||| |||| || |||||||| || ||||| || ||||| || |||||||| ||||||||||||||||||||||||||||| |||||||| |||||||| ||||| ||||||||||||||||||||| | || ||||| ||||| ||| ||||||||||||||||||||||||||||||| |||||||||||||| ||||| || || |||||||||||||||| || ||||| |||||| || ||||||||||||| ||||||||||||||||||||||||||||||||||||||||||||||||||| ||||||| ||||||||||||||||||||</Hsp_midline> + </Hsp> + </Hit_hsps> +</Hit> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4628008</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> +</Iteration> +<Iteration> + <Iteration_iter-num>13</Iteration_iter-num> + <Iteration_query-ID>Query_5</Iteration_query-ID> + <Iteration_query-def>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>14</Iteration_iter-num> + <Iteration_query-ID>Query_5</Iteration_query-ID> + <Iteration_query-def>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>15</Iteration_iter-num> + <Iteration_query-ID>Query_5</Iteration_query-ID> + <Iteration_query-def>gi|18148870|dbj|AB062417.1| Synthetic construct Bos taurus gene for rhodopsin, complete cds</Iteration_query-def> + <Iteration_query-len>1047</Iteration_query-len> +<Iteration_hits> +<Hit> + <Hit_num>1</Hit_num> + <Hit_id>Subject_3</Hit_id> + <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def> + <Hit_accession>Subject_3</Hit_accession> + <Hit_len>1213</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>1208.83</Hsp_bit-score> + <Hsp_score>654</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>1</Hsp_query-from> + <Hsp_query-to>1047</Hsp_query-to> + <Hsp_hit-from>88</Hsp_hit-from> + <Hsp_hit-to>1134</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_hit-frame>1</Hsp_hit-frame> + <Hsp_identity>917</Hsp_identity> + <Hsp_positive>917</Hsp_positive> + <Hsp_gaps>2</Hsp_gaps> + <Hsp_align-len>1048</Hsp_align-len> + <Hsp_qseq>ATGAACGGGACCGAGGGCCCAAACTTCTACGTGCCTTTCTCCAACAAGACGGGCGTCGTACGCAGCCCCTTCGAGGCGCCGCAGTACTACCTGGCTGAGCCATGGCAGTTCAGCATGCTGGCCGCCTACATGTTCCTGCTGATCATGCTTGGCTTCCCCATCAACTTCCTCACGCTGTACGTCACAGTCCAGCACAAGAAGCTGAGGACCCCCCTCAACTACATCCTGCTCAACCTGGCCGTGGCAGATCTCTTCATGGTGTTCGGGGGCTTCACCACCACCCTGTATACCTCTCTGCACGGGTACTTCGTGTTCGGTCCGACGGGCTGCAACCTCGAGGGCTTCTTTGCCACCTTAGGCGGTGAAATTGCACTGTGGTCCTTGGTGGTGCTAGCCATCGAGCGGTACGTAGTGGTGTGCAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCACGCCATCATGGGCGTCGCATTCACCTGGGTCATGGCTCTGGCCTGTGCGGCCCCCCCCCTCGTCGGCTGGTCTAGATACATCCCGGAGGGGATGCAGTGCTCGTGCGGGATCGATTACTACACGCCCCACGAGGAGACCAACAATGAGTCGTTCGTCATCTACATGTTCGTTGTACACTTCATCATCCCCCTGATTGTCATATTCTTCTGCTACGGGCAGCTGGTCTTCACCGTCAAGGAGGCTGCAGCCCAGCAGCAGGAGTCGGCCACCACTCAGAAGGCCGAGAAGGAGGTCACGCGTATGGTCATCATCATGGTCATCGCTTTCCTCATATGCTGGCTGCCCTACGCAGGTGTGGCGTTCTACATCTTCACCCATCAGGGATCCGACTTTGGCCCCATCTTCATGACCATCCCGGCTTTCTTTGCCAAGA-CGTCTGCCGTCTATAACCCCGTCATCTACATCATGATGAACAAGCAGTTCCGGAACTGCATGGTCACCACTCTCTGCTGTGGCAAGAACCCCCTAGGTGACGACGAGGCCTCCACGACCGTGTCCAAGACAGAGACCAGCCAAGTGGCCCCTGCCTAA</Hsp_qseq> + <Hsp_hseq>ATGAATGGCACAGAAGGCCCTAACTTCTACGTGCCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTGGCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGCTTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCTCTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTCACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAATTTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTGGCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAACCATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCACTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTACTACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCACTTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAGGAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACCCGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTGGCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCAGCGTTCTTTGCCAAGAGCGCC-GCCATCTACAACCCTGTCATCTATATCATGATGAACAAGCAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGATGAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAA</Hsp_hseq> + <Hsp_midline>||||| || || || ||||| |||||||||||||| |||||||| |||||| || |||||||||||||||||| || |||||||||||||||||||||||||||||| ||||||||||||||||||||| ||||||||| |||| |||||||||||||||||||||||||| |||||||| |||||||||||||||||| | || || ||||||||||||||||||||||| |||||||| || ||||||||||| | || |||||||||| |||||| || ||||||||||| || |||||||| ||||| || || || ||||| | |||||||||||||||||| | |||||||||||||| ||||||||||||||||| || ||||||||||||||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||| || ||||||||||||||||| |||||||| || || ||||| |||| ||||||||| || |||||||| ||||| ||||||||||||| || ||||| |||||||||| | | |||| |||||| ||||| || ||||||||||||||||| || ||||||| ||||||| ||||| |||| || |||||||| |||||||| |||||||||||||||||||| || ||||||||||||||||| |||||||| |||||||| |||||||||||||| || ||||||||||||||||||||||||||||| || |||||| |||||||||| | ||||| ||||||||||||||||| ||||| ||| |||| || |||||||||||||||||||| || ||||||||||||| || | ||| |||| ||||| |||||||| ||||||||||||||||||||||||||||||||| ||||||| ||||||| ||||||||||| || |||||||| |||||||| | |||||||||||||| ||||| ||||| |||||||| ||||||</Hsp_midline> + </Hsp> + </Hit_hsps> +</Hit> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>4933992</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> +</Iteration> +<Iteration> + <Iteration_iter-num>16</Iteration_iter-num> + <Iteration_query-ID>Query_6</Iteration_query-ID> + <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def> + <Iteration_query-len>1344</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>6353949</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>17</Iteration_iter-num> + <Iteration_query-ID>Query_6</Iteration_query-ID> + <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def> + <Iteration_query-len>1344</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>6353949</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +<Iteration> + <Iteration_iter-num>18</Iteration_iter-num> + <Iteration_query-ID>Query_6</Iteration_query-ID> + <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def> + <Iteration_query-len>1344</Iteration_query-len> +<Iteration_hits> +</Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>0</Statistics_db-num> + <Statistics_db-len>0</Statistics_db-len> + <Statistics_hsp-len>15</Statistics_hsp-len> + <Statistics_eff-space>6353949</Statistics_eff-space> + <Statistics_kappa>0.46</Statistics_kappa> + <Statistics_lambda>1.28</Statistics_lambda> + <Statistics_entropy>0.85</Statistics_entropy> + </Statistics> + </Iteration_stat> + <Iteration_message>No hits found</Iteration_message> +</Iteration> +</BlastOutput_iterations> +</BlastOutput> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastn_rhodopsin_vs_three_human_converted.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208
--- a/test-data/blastp_four_human_vs_rhodopsin.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastp_four_human_vs_rhodopsin.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,6 +1,6 @@ -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 -sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 -sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 -sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 -sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 -sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599
--- a/test-data/blastp_four_human_vs_rhodopsin.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastp_four_human_vs_rhodopsin.xml Fri Mar 14 07:40:46 2014 -0400 @@ -2,7 +2,7 @@ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> <BlastOutput> <BlastOutput_program>blastp</BlastOutput_program> - <BlastOutput_version>BLASTP 2.2.28+</BlastOutput_version> + <BlastOutput_version>BLASTP 2.2.29+</BlastOutput_version> <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference> <BlastOutput_db></BlastOutput_db> <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>
--- a/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,6 +1,6 @@ -sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 N/A -sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 N/A -sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A -sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A -sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 N/A -sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 N/A +sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 N/A +sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 N/A +sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A +sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A +sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 N/A +sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 N/A
--- a/test-data/blastp_rhodopsin_vs_four_human.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastp_rhodopsin_vs_four_human.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,6 +1,6 @@ -gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 348 1 348 0.0 679 -gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.33 354 53 2 1 354 1 348 0.0 605 -gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0 630 -gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0 630 -gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.10 348 23 1 1 347 1 348 0.0 651 -gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.09 349 65 1 1 349 1 348 0.0 587 +gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 348 1 348 0.0 701 +gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.33 354 53 2 1 354 1 348 0.0 605 +gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0 630 +gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0 630 +gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.10 348 23 1 1 347 1 348 0.0 651 +gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.09 349 65 1 1 349 1 348 0.0 587
--- a/test-data/blastx_rhodopsin_vs_four_human.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastx_rhodopsin_vs_four_human.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532
--- a/test-data/blastx_rhodopsin_vs_four_human.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastx_rhodopsin_vs_four_human.xml Fri Mar 14 07:40:46 2014 -0400 @@ -2,7 +2,7 @@ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> <BlastOutput> <BlastOutput_program>blastx</BlastOutput_program> - <BlastOutput_version>BLASTX 2.2.28+</BlastOutput_version> + <BlastOutput_version>BLASTX 2.2.29+</BlastOutput_version> <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference> <BlastOutput_db></BlastOutput_db> <BlastOutput_query-ID>Query_1</BlastOutput_query-ID> @@ -307,9 +307,9 @@ <Hit_hsps> <Hsp> <Hsp_num>1</Hsp_num> - <Hsp_bit-score>220.32</Hsp_bit-score> - <Hsp_score>560</Hsp_score> - <Hsp_evalue>4.29169e-67</Hsp_evalue> + <Hsp_bit-score>220.705</Hsp_bit-score> + <Hsp_score>561</Hsp_score> + <Hsp_evalue>3.21377e-67</Hsp_evalue> <Hsp_query-from>1</Hsp_query-from> <Hsp_query-to>333</Hsp_query-to> <Hsp_hit-from>11</Hsp_hit-from> @@ -345,9 +345,9 @@ </Hsp> <Hsp> <Hsp_num>3</Hsp_num> - <Hsp_bit-score>121.324</Hsp_bit-score> - <Hsp_score>303</Hsp_score> - <Hsp_evalue>1.96633e-33</Hsp_evalue> + <Hsp_bit-score>121.709</Hsp_bit-score> + <Hsp_score>304</Hsp_score> + <Hsp_evalue>1.62516e-33</Hsp_evalue> <Hsp_query-from>2855</Hsp_query-from> <Hsp_query-to>3031</Hsp_query-to> <Hsp_hit-from>177</Hsp_hit-from>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastx_rhodopsin_vs_four_human_all.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,10 @@ +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|57163782|ref|NM_001009242.1| gi|57163782|ref|NM_001009242.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA71FL4TS79VA9ML34VI71LI7SASA21LI13TA1GV11 99 99 N/A N/A N/A N/A N/A +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A 0 gi|2734705|gb|U59921.1|BBU59921 gi|2734705|gb|U59921.1|BBU59921 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 10IV1MF2KA20YF1IM1CA7LV7ML5IV17FV1NDHL4CG3VS1ML1ST1MLNH3IVLF1AP3YNVL36SG4VI7IV3SA2VA3LA9ML5VI26LM8RQ2CF30VIFA13FY2STNH3EN7VI7SASA9LM8IL2LI6FL1ED1DE 63 63 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA9HY61FL4TS28 22 8 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 18LV3GS19LI7SASA13 22 5 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 6ML34VI14VARASA 22 4 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 1AG36LV1LF13VA4 22 4 N/A N/A N/A N/A N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 9LI13TA1AV 22 2 N/A N/A N/A N/A N/A +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A 0 gi|283855822|gb|GQ290312.1| gi|283855822|gb|GQ290312.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA66ND4FL4TS5ML7AP49LV25ML34VI40VI6LV19VI11SASA21LI13TA1 99 99 N/A N/A N/A N/A N/A +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|18148870|dbj|AB062417.1| gi|18148870|dbj|AB062417.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA9AY22MV38FL4TS79VA9ML10PLHKEP1TV14IT2LM1VI47LV3GS11DN14TSSA1VI17VL2LI13TA13 99 99 N/A N/A N/A N/A N/A +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A 0 gi|12583664|dbj|AB043817.1| gi|12583664|dbj|AB043817.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 10IV1MF22AQ2AM1SA4FL2IVAL12IVEQ23FL4TS1ML3ML12IL14CS8WYMV5VMTS6SN6MAVF2TV6LA3FA20RLAK1GEIV10TVCV2ST2LMAIVI1SF4RQ2CF12EA3RK2RK6VI4SA2VI9WF8TN9SA5SA1LI3MV3CM6HN2IL2LI6FLEGEDEDDEGA 74 74 N/A N/A N/A N/A N/A
--- a/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,6 +1,6 @@ gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1
--- a/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,8 +1,8 @@ gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 sp|P08100|OPSD_HUMAN 560 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 303 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
--- a/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,10 +1,10 @@ -gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A -gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 sp|P08100|OPSD_HUMAN 560 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A -gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 303 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A +gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A +gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A +gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A -gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A -gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A -gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A +gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A +gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A +gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/convert2blastmask_four_human_masked.maskinfo-asn1 Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,158 @@ +Blast-db-mask-info ::= { + algo-id 0, + algo-program seg, + algo-options "window=12; locut=2.2; hicut=2.5", + masks { + masks { + int { + from 6, + to 18, + id swissprot { + name "ERP44_HUMAN", + accession "Q9BS26", + release "reviewed" + } + }, + packed-int { + { + from 11, + to 46, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 325, + to 332, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 421, + to 496, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 501, + to 516, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 536, + to 558, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 636, + to 648, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 737, + to 762, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 789, + to 806, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 970, + to 983, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + }, + { + from 999, + to 1010, + id swissprot { + name "BMP2K_HUMAN", + accession "Q9NSY1", + release "reviewed" + } + } + }, + packed-int { + { + from 3, + to 26, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 372, + to 390, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 766, + to 791, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + }, + { + from 1312, + to 1324, + id swissprot { + name "INSR_HUMAN", + accession "P06213", + release "reviewed" + } + } + }, + int { + from 230, + to 246, + id swissprot { + name "OPSD_HUMAN", + accession "P08100", + release "reviewed" + } + } + }, + more FALSE + } +}
--- a/test-data/four_human_proteins.fasta.log Tue Jan 21 13:37:01 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ - - -Building a new DB, current time: 11/21/2013 11:16:27 -New DB name: /tmp/tmpnSjpCP/tmpwAbNo4/database/files/000/dataset_2_files/blastdb -New DB title: Just 4 human proteins -Sequence type: Protein -Keep Linkouts: T -Keep MBits: T -Maximum file size: 1000000000B -Adding sequences from FASTA; added 4 sequences in 0.00202417 seconds.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins.fasta.log.txt Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,5 @@ +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_masked.fasta Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,61 @@ +>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 +MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF +SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK +REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER +VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK +CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD +CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF +HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL +>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 +MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG +GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS +DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD +LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG +KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP +DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT +IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE +Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq +qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy +qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV +ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD +KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD +QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE +NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA +QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK +APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD +EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR +HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS +WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ +SQQSQPVELDPFGAAPFPSKQ +>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 +MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL +QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL +VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE +ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL +GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG +CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC +TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL +EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE +RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ +NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS +DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE +RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL +KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf +pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV +SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV +SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG +PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR +EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG +FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA +AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV +RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN +CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme +fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN +PS +>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY +VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG +GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP +EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes +attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI +YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.log.txt Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,5 @@ +New DB title: Just 4 human proteins +Sequence type: Protein +Keep Linkouts: T +Keep MBits: T +Maximum file size: 1000000000B
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.phd Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,4 @@ +11117184492 +29249033410 +36665887501 +5392473183
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/four_human_proteins_taxid.fasta.psd Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,4 @@ +gnl|bl_ord_id|00 +gnl|bl_ord_id|11 +gnl|bl_ord_id|22 +gnl|bl_ord_id|33
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/segmasker_four_human.fasta Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,61 @@ +>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 +MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF +SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK +REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER +VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK +CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD +CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF +HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL +>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 +MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG +GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS +DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD +LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG +KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP +DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT +IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE +Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq +qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy +qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV +ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD +KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD +QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE +NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA +QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK +APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD +EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR +HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS +WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ +SQQSQPVELDPFGAAPFPSKQ +>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 +MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL +QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL +VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE +ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL +GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG +CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC +TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL +EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE +RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ +NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS +DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE +RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL +KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf +pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV +SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV +SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG +PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR +EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG +FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA +AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV +RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN +CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme +fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN +PS +>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 +MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY +VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG +GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP +EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes +attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI +YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/segmasker_four_human.maskinfo-asn1 Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,114 @@ +Blast-db-mask-info ::= { + algo-id 1, + algo-program seg, + algo-options "window=12; locut=2.2; hicut=2.5", + masks { + masks { + int { + from 6, + to 18, + id local id 1 + }, + packed-int { + { + from 11, + to 46, + id local id 2 + }, + { + from 325, + to 332, + id local id 2 + }, + { + from 421, + to 443, + id local id 2 + }, + { + from 437, + to 450, + id local id 2 + }, + { + from 447, + to 496, + id local id 2 + }, + { + from 501, + to 516, + id local id 2 + }, + { + from 536, + to 554, + id local id 2 + }, + { + from 545, + to 558, + id local id 2 + }, + { + from 636, + to 648, + id local id 2 + }, + { + from 737, + to 762, + id local id 2 + }, + { + from 789, + to 806, + id local id 2 + }, + { + from 970, + to 983, + id local id 2 + }, + { + from 999, + to 1010, + id local id 2 + } + }, + packed-int { + { + from 3, + to 26, + id local id 3 + }, + { + from 372, + to 390, + id local id 3 + }, + { + from 766, + to 782, + id local id 3 + }, + { + from 780, + to 791, + id local id 3 + }, + { + from 1312, + to 1324, + id local id 3 + } + }, + int { + from 230, + to 246, + id local id 4 + } + }, + more FALSE + } +}
--- a/test-data/tblastn_four_human_vs_rhodopsin.html Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/tblastn_four_human_vs_rhodopsin.html Fri Mar 14 07:40:46 2014 -0400 @@ -3,7 +3,7 @@ <BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099"> <PRE> -<b>TBLASTN 2.2.28+</b> +<b>TBLASTN 2.2.29+</b> <b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 @@ -461,8 +461,8 @@ Length=1047 -<script src="blastResult.js"></script> - Score = 732 bits (1689), Expect = 0.0, Method: Compositional matrix adjust. + + Score = 732 bits (1689), Expect = 0.0, Method: Compositional matrix adjust. Identities = 336/348 (97%), Positives = 343/348 (99%), Gaps = 0/348 (0%) Frame = +1 @@ -511,8 +511,8 @@ Length=1574 -<script src="blastResult.js"></script> - Score = 646 bits (1489), Expect = 0.0, Method: Compositional matrix adjust. + + Score = 646 bits (1489), Expect = 0.0, Method: Compositional matrix adjust. Identities = 290/342 (85%), Positives = 320/342 (94%), Gaps = 1/342 (0%) Frame = +3 @@ -561,8 +561,8 @@ Length=4301 -<script src="blastResult.js"></script> - Score = 151 bits (342), Expect(2) = 1e-72, Method: Compositional matrix adjust. + + Score = 151 bits (342), Expect(2) = 1e-72, Method: Compositional matrix adjust. Identities = 69/74 (93%), Positives = 73/74 (99%), Gaps = 0/74 (0%) Frame = +3 @@ -575,7 +575,7 @@ Sbjct 3327 SIYNPVIYIMMNKQ 3368 - Score = 126 bits (284), Expect(2) = 1e-72, Method: Compositional matrix adjust. + Score = 126 bits (284), Expect(2) = 1e-72, Method: Compositional matrix adjust. Identities = 54/59 (92%), Positives = 57/59 (97%), Gaps = 0/59 (0%) Frame = +2 @@ -584,7 +584,7 @@ Sbjct 2855 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 3031 - Score = 229 bits (523), Expect = 9e-67, Method: Compositional matrix adjust. + Score = 229 bits (523), Expect = 9e-67, Method: Compositional matrix adjust. Identities = 107/111 (96%), Positives = 109/111 (98%), Gaps = 0/111 (0%) Frame = +1 @@ -597,7 +597,7 @@ Sbjct 181 PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 333 - Score = 122 bits (276), Expect = 1e-32, Method: Compositional matrix adjust. + Score = 122 bits (276), Expect = 1e-32, Method: Compositional matrix adjust. Identities = 55/59 (93%), Positives = 56/59 (95%), Gaps = 0/59 (0%) Frame = +3 @@ -635,8 +635,8 @@ Length=983 -<script src="blastResult.js"></script> - Score = 658 bits (1517), Expect = 0.0, Method: Compositional matrix adjust. + + Score = 658 bits (1517), Expect = 0.0, Method: Compositional matrix adjust. Identities = 310/326 (95%), Positives = 322/326 (99%), Gaps = 0/326 (0%) Frame = +1 @@ -685,8 +685,8 @@ Length=1047 -<script src="blastResult.js"></script> - Score = 711 bits (1640), Expect = 0.0, Method: Compositional matrix adjust. + + Score = 711 bits (1640), Expect = 0.0, Method: Compositional matrix adjust. Identities = 325/348 (93%), Positives = 337/348 (97%), Gaps = 0/348 (0%) Frame = +1 @@ -735,8 +735,8 @@ Length=1344 -<script src="blastResult.js"></script> - Score = 626 bits (1444), Expect = 0.0, Method: Compositional matrix adjust. + + Score = 626 bits (1444), Expect = 0.0, Method: Compositional matrix adjust. Identities = 281/342 (82%), Positives = 311/342 (91%), Gaps = 1/342 (0%) Frame = +2
--- a/test-data/tblastn_four_human_vs_rhodopsin.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/tblastn_four_human_vs_rhodopsin.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,10 +1,10 @@ -sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732 -sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 +sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732 +sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7 -sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 -sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 -sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 +sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 +sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 +sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626
--- a/test-data/tblastn_four_human_vs_rhodopsin.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Fri Mar 14 07:40:46 2014 -0400 @@ -2,7 +2,7 @@ <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> <BlastOutput> <BlastOutput_program>tblastn</BlastOutput_program> - <BlastOutput_version>TBLASTN 2.2.28+</BlastOutput_version> + <BlastOutput_version>TBLASTN 2.2.29+</BlastOutput_version> <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference> <BlastOutput_db></BlastOutput_db> <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
--- a/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,10 +1,10 @@ -sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732 gi|57163782|ref|NM_001009242.1| 1689 336 343 0 98.56 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 1047 N/A -sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 gi|2734705|gb|U59921.1|BBU59921 1489 290 320 1 93.57 0 3 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 1574 N/A -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 gi|283855845|gb|GQ290303.1| 342 69 73 0 98.65 0 3 ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ 348 4301 N/A -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 gi|283855845|gb|GQ290303.1| 284 54 57 0 96.61 0 2 RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 348 4301 N/A -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 gi|283855845|gb|GQ290303.1| 523 107 109 0 98.20 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 348 4301 N/A -sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 gi|283855845|gb|GQ290303.1| 276 55 56 0 94.92 0 3 LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR 348 4301 N/A +sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0 732 gi|57163782|ref|NM_001009242.1| 1689 336 343 0 98.56 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 1047 N/A +sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0 646 gi|2734705|gb|U59921.1|BBU59921 1489 290 320 1 93.57 0 3 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 1574 N/A +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72 151 gi|283855845|gb|GQ290303.1| 342 69 73 0 98.65 0 3 ESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ ESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ 348 4301 N/A +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72 126 gi|283855845|gb|GQ290303.1| 284 54 57 0 96.61 0 2 RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS 348 4301 N/A +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67 229 gi|283855845|gb|GQ290303.1| 523 107 109 0 98.20 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG 348 4301 N/A +sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32 122 gi|283855845|gb|GQ290303.1| 276 55 56 0 94.92 0 3 LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR 348 4301 N/A sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7 gi|283855845|gb|GQ290303.1| 125 23 24 0 92.31 0 1 QFRNCMLTTICCGKNPLGDDEASATV QFRNCMLTTLCCGKNPLGDDEASTTA 348 4301 N/A -sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 gi|283855822|gb|GQ290312.1| 1517 310 322 0 98.77 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT 348 983 N/A -sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 gi|18148870|dbj|AB062417.1| 1640 325 337 0 96.84 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 1047 N/A -sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 gi|12583664|dbj|AB043817.1| 1444 281 311 1 90.94 0 2 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 1344 N/A +sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0 658 gi|283855822|gb|GQ290312.1| 1517 310 322 0 98.77 0 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT 348 983 N/A +sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0 711 gi|18148870|dbj|AB062417.1| 1640 325 337 0 96.84 0 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 1047 N/A +sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0 626 gi|12583664|dbj|AB043817.1| 1444 281 311 1 90.94 0 2 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 1344 N/A
--- a/test-data/tblastx_rhodopsin_vs_three_human.tabular Tue Jan 21 13:37:01 2014 -0500 +++ b/test-data/tblastx_rhodopsin_vs_three_human.tabular Fri Mar 14 07:40:46 2014 -0400 @@ -1,57 +1,57 @@ -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 97.39 230 6 0 1 690 88 777 0.0 559 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.12 102 6 0 742 1047 829 1134 0.0 236 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 91.22 148 13 0 1046 603 1133 690 0.0 308 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.32 88 5 0 566 303 653 390 0.0 207 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 96.34 82 3 0 248 3 335 90 0.0 182 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 83.33 204 34 0 18 629 105 716 4e-158 404 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 75.28 89 22 0 780 1046 867 1133 4e-158 161 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.28 203 38 0 609 1 696 88 5e-153 360 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 80.60 67 13 0 916 716 1003 803 5e-153 135 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 97.39 230 6 0 1 690 88 777 0.0 559 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.12 102 6 0 742 1047 829 1134 0.0 236 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 91.22 148 13 0 1046 603 1133 690 0.0 308 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.32 88 5 0 566 303 653 390 0.0 207 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 96.34 82 3 0 248 3 335 90 0.0 182 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 83.33 204 34 0 18 629 105 716 4e-158 404 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 75.28 89 22 0 780 1046 867 1133 4e-158 161 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.28 203 38 0 609 1 696 88 5e-153 360 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 80.60 67 13 0 916 716 1003 803 5e-153 135 gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 70.27 37 11 0 1047 937 1134 1024 5e-153 64.2 gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 100.00 7 0 0 646 626 733 713 5e-153 24.0 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.23 65 7 0 460 266 547 353 4e-105 167 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.58 48 5 0 184 41 271 128 4e-105 104 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.23 65 7 0 460 266 547 353 4e-105 167 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 89.58 48 5 0 184 41 271 128 4e-105 104 gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 77.78 45 10 0 882 748 969 835 4e-105 93.9 gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 67.86 28 9 0 1045 962 1132 1049 4e-105 51.9 gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 59.09 22 9 0 586 521 673 608 4e-105 33.1 -gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.40 86 16 0 296 553 383 640 2e-87 185 +gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 81.40 86 16 0 296 553 383 640 2e-87 185 gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 84.38 32 5 0 11 106 98 193 2e-87 74.8 gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 71.43 35 10 0 941 1045 1028 1132 2e-87 61.6 gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 94.44 18 1 0 794 847 881 934 2e-87 50.1 -gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 83.61 238 39 0 18 731 64 777 0.0 507 -gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 82.35 85 15 0 783 1037 829 1083 0.0 188 -gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 70.96 303 88 0 925 17 971 63 2e-130 435 +gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 83.61 238 39 0 18 731 64 777 0.0 507 +gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 82.35 85 15 0 783 1037 829 1083 0.0 188 +gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 70.96 303 88 0 925 17 971 63 2e-130 435 gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 72.22 18 5 0 1027 974 1073 1020 2e-130 35.0 -gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 55.32 188 84 0 605 42 651 88 7e-89 245 +gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 55.32 188 84 0 605 42 651 88 7e-89 245 gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 61.11 72 28 0 1037 822 1083 868 7e-89 91.3 -gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 49.02 204 104 0 29 640 75 686 4e-78 197 +gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 49.02 204 104 0 29 640 75 686 4e-78 197 gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 66.04 53 18 0 860 1018 906 1064 4e-78 85.8 gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 44.44 27 15 0 689 769 735 815 4e-78 32.2 -gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 47.47 198 104 0 633 40 679 86 4e-65 177 +gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 47.47 198 104 0 633 40 679 86 4e-65 177 gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 68.09 47 15 0 1017 877 1063 923 4e-65 80.3 -gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 57.89 114 48 0 265 606 311 652 3e-46 137 +gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 57.89 114 48 0 265 606 311 652 3e-46 137 gi|2734705|gb|U59921.1|BBU59921 ENA|BC112106|BC112106.1 46.30 54 29 0 19 180 65 226 3e-46 52.4 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.40 111 4 0 1 333 118 450 0.0 264 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.31 65 5 0 3174 3368 829 1023 0.0 151 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.43 56 2 0 2855 3022 616 783 0.0 141 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.22 59 4 0 1404 1580 442 618 0.0 138 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.40 111 4 0 1 333 118 450 0.0 264 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.31 65 5 0 3174 3368 829 1023 0.0 151 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 96.43 56 2 0 2855 3022 616 783 0.0 141 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.22 59 4 0 1404 1580 442 618 0.0 138 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.00 25 2 0 4222 4296 1021 1095 0.0 64.3 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 88.89 9 1 0 3128 3154 783 809 0.0 22.6 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 78.38 111 24 0 333 1 450 118 7e-171 212 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 88.75 80 9 0 3367 3128 1022 783 7e-171 161 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.33 60 4 0 1582 1403 620 441 7e-171 136 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.07 56 5 0 3021 2854 782 615 7e-171 119 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 78.38 111 24 0 333 1 450 118 7e-171 212 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 88.75 80 9 0 3367 3128 1022 783 7e-171 161 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 93.33 60 4 0 1582 1403 620 441 7e-171 136 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.07 56 5 0 3021 2854 782 615 7e-171 119 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 84.62 26 4 0 4301 4224 1100 1023 7e-171 52.8 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 95.83 72 3 0 218 3 335 120 8e-142 152 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 77.78 63 14 0 3368 3180 1023 835 8e-142 125 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 85.11 47 7 0 1544 1404 582 442 8e-142 108 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.36 56 11 0 3022 2855 783 616 8e-142 101 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 95.83 72 3 0 218 3 335 120 8e-142 152 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 77.78 63 14 0 3368 3180 1023 835 8e-142 125 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 85.11 47 7 0 1544 1404 582 442 8e-142 108 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.36 56 11 0 3022 2855 783 616 8e-142 101 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 75.86 29 7 0 325 239 442 356 8e-142 58.3 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 86.36 22 3 0 4287 4222 1086 1021 8e-142 48.7 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 90.91 11 1 0 3159 3127 814 782 8e-142 31.3 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 81.03 58 11 0 2854 3027 615 788 2e-122 128 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 83.33 60 10 0 1403 1582 441 620 2e-122 125 -gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.60 67 13 0 3 203 120 320 2e-122 119 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 81.03 58 11 0 2854 3027 615 788 2e-122 128 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 83.33 60 10 0 1403 1582 441 620 2e-122 125 +gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.60 67 13 0 3 203 120 320 2e-122 119 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.30 23 2 0 4220 4288 1019 1087 2e-122 53.8 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 78.26 23 5 0 266 334 383 451 2e-122 48.3 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 80.00 20 4 0 3308 3367 963 1022 2e-122 46.0 @@ -68,16 +68,16 @@ gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 77.27 22 5 0 267 332 384 449 6e-43 45.1 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 86.36 22 3 0 4224 4289 1023 1088 6e-43 44.1 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 83.33 12 2 0 2856 2891 617 652 6e-43 25.4 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 95.91 220 9 0 1 660 118 777 0.0 526 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 92.13 89 7 0 712 978 829 1095 0.0 212 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.01 178 16 0 536 3 653 120 1e-178 353 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 88.32 137 16 0 983 573 1100 690 1e-178 277 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 77.43 319 72 0 3 959 120 1076 4e-174 593 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 79.07 129 27 0 558 172 675 289 2e-133 248 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 76.83 82 19 0 963 718 1080 835 2e-133 159 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 95.91 220 9 0 1 660 118 777 0.0 526 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 92.13 89 7 0 712 978 829 1095 0.0 212 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.01 178 16 0 536 3 653 120 1e-178 353 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 88.32 137 16 0 983 573 1100 690 1e-178 277 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 77.43 319 72 0 3 959 120 1076 4e-174 593 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 79.07 129 27 0 558 172 675 289 2e-133 248 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 76.83 82 19 0 963 718 1080 835 2e-133 159 gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 84.09 44 7 0 133 2 250 119 2e-133 97.3 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 73.08 78 21 0 433 200 550 317 6e-102 145 -gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 70.15 67 20 0 799 599 916 716 6e-102 106 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 73.08 78 21 0 433 200 550 317 6e-102 145 +gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 70.15 67 20 0 799 599 916 716 6e-102 106 gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 80.49 41 8 0 123 1 240 118 6e-102 84.5 gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 77.78 27 6 0 553 473 670 590 6e-102 51.9 gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 71.43 14 4 0 889 848 1006 965 6e-102 32.7 @@ -87,16 +87,16 @@ gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 81.40 43 8 0 404 532 521 649 4e-48 47.3 gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 88.89 18 2 0 764 817 881 934 4e-48 44.6 gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 87.50 8 1 0 935 958 1052 1075 4e-48 21.7 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 93.91 230 14 0 1 690 88 777 0.0 538 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 91.18 102 9 0 742 1047 829 1134 0.0 233 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 88.83 188 21 0 566 3 653 90 0.0 394 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 84.06 138 22 0 1046 633 1133 720 0.0 260 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.11 228 75 0 684 1 771 88 7e-132 333 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.27 110 36 0 1045 716 1132 803 7e-132 141 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 70.20 151 45 0 3 455 90 542 1e-128 236 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 64.04 89 32 0 780 1046 867 1133 1e-128 136 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.22 74 25 0 510 731 597 818 1e-128 111 -gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.04 106 36 0 242 559 329 646 2e-58 161 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 93.91 230 14 0 1 690 88 777 0.0 538 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 91.18 102 9 0 742 1047 829 1134 0.0 233 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 88.83 188 21 0 566 3 653 90 0.0 394 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 84.06 138 22 0 1046 633 1133 720 0.0 260 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.11 228 75 0 684 1 771 88 7e-132 333 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 67.27 110 36 0 1045 716 1132 803 7e-132 141 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 70.20 151 45 0 3 455 90 542 1e-128 236 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 64.04 89 32 0 780 1046 867 1133 1e-128 136 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.22 74 25 0 510 731 597 818 1e-128 111 +gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.04 106 36 0 242 559 329 646 2e-58 161 gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 85.71 21 3 0 92 154 179 241 2e-58 53.8 gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 73.68 19 5 0 791 847 878 934 2e-58 39.1 gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 61.29 62 24 0 424 239 511 326 4e-55 81.3 @@ -104,11 +104,11 @@ gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 65.71 35 12 0 882 778 969 865 4e-55 56.3 gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 58.14 43 18 0 649 521 736 608 4e-55 50.6 gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 66.67 12 4 0 972 937 1059 1024 4e-55 23.9 -gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 82.13 235 42 0 11 715 76 780 0.0 498 -gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 78.31 83 18 0 770 1018 835 1083 0.0 177 -gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 72.29 332 92 0 1017 22 1082 87 1e-150 516 -gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 48.30 147 76 0 712 272 777 337 2e-98 169 -gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 54.17 72 33 0 1030 815 1095 880 2e-98 103 +gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 82.13 235 42 0 11 715 76 780 0.0 498 +gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 78.31 83 18 0 770 1018 835 1083 0.0 177 +gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 72.29 332 92 0 1017 22 1082 87 1e-150 516 +gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 48.30 147 76 0 712 272 777 337 2e-98 169 +gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 54.17 72 33 0 1030 815 1095 880 2e-98 103 gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 47.83 69 36 0 220 14 285 79 2e-98 83.5 gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 72.00 25 7 0 782 708 847 773 2e-98 45.1 gi|12583664|dbj|AB043817.1| ENA|BC112106|BC112106.1 56.00 75 33 0 532 756 597 821 5e-65 87.7
--- a/tools/ncbi_blast_plus/README.rst Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/README.rst Fri Mar 14 07:40:46 2014 -0400 @@ -1,9 +1,8 @@ Galaxy wrappers for NCBI BLAST+ suite ===================================== -These wrappers are copyright 2010-2013 by Peter Cock, The James Hutton Institute -(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. -See the licence text below. +These wrappers are copyright 2010-2013 by Peter Cock (The James Hutton Institute, +UK) and additional contributors. All rights reserved. See the licence text below. Currently tested with NCBI BLAST 2.2.28+ (i.e. version 2.2.28 of BLAST+), and does not work with the NCBI 'legacy' BLAST suite (e.g. ``blastall``). @@ -26,17 +25,7 @@ (``blastxml``) and protein and nucleotide BLAST databases (``blastdbp`` and ``blastdbn``). -You must tell Galaxy about any system level BLAST databases using configuration -files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein -databases like NR), and blastdb_d.loc (protein domain databases like CDD or -SMART) which are located in the tool-data/ folder. Sample files are included -which explain the tab-based format to use. - -You can download the NCBI provided databases as tar-balls from here: - -* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR) -* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD) - +See the configuration notes below. Manual Installation =================== @@ -79,6 +68,39 @@ ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools +Configuration +============= + +You must tell Galaxy about any system level BLAST databases using configuration +files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein +databases like NR), and blastdb_d.loc (protein domain databases like CDD or +SMART) which are located in the tool-data/ folder. Sample files are included +which explain the tab-based format to use. + +You can download the NCBI provided databases as tar-balls from here: + +* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR) +* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD) + +If using the optional taxonomy columns, you will also need to download the +NCBI taxonomy files (``taxdb.btd`` and ``taxdb.bti`` from ``taxdb.tar.gz`` on +the BLAST database FTP site). Currently explicit version tracking of the +taxonomy is not supported, and in order to use this you must set the +``$BLASTDB`` environment variable to include the path where you unzipped the +taxonomy files. If this is not done, the taxonomy columns like species name +will appear as ``N/A`` in the tabular output. + +The BLAST+ binaries support multi-threaded operation, which is handled via the +$GALAXY_SLOTS environment variable. This should be set automatically by Galaxy +via your job runner settings, which allows you to (for example) allocate four +cores to each BLAST job. + +In addition, the BLAST+ wrappers also support high level parallelism by task +splitting if ``use_tasked_jobs = True`` is enabled in your ``universe_wsgi.ini`` +configuration file. Essentially, the FASTA input query files are broken up into +batches of 1000 sequences, a separate BLAST child job is run for each chunk, +and then the BLAST output files are merged (in order). This is transparent +for the end user. History ======= @@ -106,7 +128,7 @@ (all too often our users where having to re-run searches just to get one of the missing columns like query or subject length) v0.0.18 - Defensive quoting of filenames in case of spaces (where possible, - BLAST+ handling of some mult-file arguments is problematic). + BLAST+ handling of some multi-file arguments is problematic). v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc for the domain databases they use (e.g. CDD, PFAM or SMART). - Correct case of exception regular expression (for error handling @@ -122,20 +144,30 @@ - Development moved to GitHub, https://github.com/peterjc/galaxy_blast - Updated citation information (Cock et al. 2013). v0.0.21 - Use macros to simplify the XML wrappers. - - Added wrapper for dustmasker - - Enabled masking for makeblastdb - - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes + - Added wrapper for dustmasker. + - Enabled masking for makeblastdb. + - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes. defined in updated blast_datatypes on Galaxy ToolShed. - - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26 - - Now depends on package_blast_plus_2_2_27 in ToolShed -v0.0.22 - More use macros to simplify the wrappers - - Set number of threads via $GALAXY_SLOTS environment variable - - More descriptive default output names - - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18) + - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26. + - Now depends on package_blast_plus_2_2_27 in ToolShed. +v0.0.22 - More use macros to simplify the wrappers. + - Set number of threads via $GALAXY_SLOTS environment variable. + - More descriptive default output names. + - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18). - Pre-check for duplicate identifiers in makeblastdb wrapper. - - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27 - - Now depends on package_blast_plus_2_2_28 in ToolShed + - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27. + - Now depends on package_blast_plus_2_2_28 in ToolShed. - Extended tabular output includes 'salltitles' as column 25. +v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed. + - Tabular output now includes option to pick specific columns, + including previously unavailable taxonomy columns. + - BLAST XML to tabular tool supports multiple input files. + - More detailed descriptions for BLASTN and BLASTP task option. + - Wrappers for segmasker, dustmasker and convert2blastmask. + - Supports using maskinfo with makeblastdb wrapper. + - Supports setting a taxonomy ID in makeblastdb wrapper. + - Subtle changes like new conditional settings will require some old + workflows be updated to cope. ======= ======================================================================
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Fri Mar 14 07:40:46 2014 -0400 @@ -62,9 +62,11 @@ """ import sys import re +import os +from optparse import OptionParser if "-v" in sys.argv or "--version" in sys.argv: - print "v0.0.22" + print "v0.1.00" sys.exit(0) if sys.version_info[:2] >= ( 2, 5 ): @@ -81,34 +83,55 @@ sys.stderr.write("%s\n" % msg) sys.exit(1) -#Parse Command Line -try: - in_file, out_file, out_fmt = sys.argv[1:] -except: - stop_err("Expect 3 arguments: input BLAST XML file, output tabular file, out format (std or ext)") +if len(sys.argv) == 4 and sys.argv[3] in ["std", "x22", "ext"]: + #False positive if user really has a BLAST XML file called 'std' or 'ext'... + stop_err("ERROR: The script API has changed, sorry.") + +usage = """usage: %prog [options] blastxml[,...] + +Convert one (or more) BLAST XML files into a single tabular file. +The columns option can be 'std' (standard 12 columns), 'ext' +(extended 25 columns), or a list of BLAST+ column names like +'qseqid,sseqid,pident' (space or comma separated). +""" +parser = OptionParser(usage=usage) +parser.add_option('-o', '--output', dest='output', default=None, help='output filename (defaults to stdout)', metavar="FILE") +parser.add_option("-c", "--columns", dest="columns", default='std', help="[std|ext|col1,col2,...] standard 12 columns, extended 25 columns, or list of column names") +(options, args) = parser.parse_args() + +colnames = 'qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles'.split(',') + +if len(args) < 1: + stop_err("ERROR: No BLASTXML input files given; run with --help to see options.") + +out_fmt = options.columns if out_fmt == "std": extended = False + cols = None elif out_fmt == "x22": stop_err("Format argument x22 has been replaced with ext (extended 25 columns)") elif out_fmt == "ext": extended = True + cols = None else: - stop_err("Format argument should be std (12 column) or ext (extended 25 columns), not: %r" % out_fmt) - + cols = out_fmt.replace(" ", ",").split(",") #Allow space or comma separated + #Remove any blank entries due to trailing comma, + #or annoying "None" dummy value from Galaxy if no columns + cols = [c for c in cols if c and c != "None"] + extra = set(cols).difference(colnames) + if extra: + stop_err("These are not recognised column names: %s" % ",".join(sorted(extra))) + del extra + assert set(colnames).issuperset(cols), cols + if not cols: + stop_err("No columns selected!") + extended = max(colnames.index(c) for c in cols) >= 12 #Do we need any higher columns? +del out_fmt -# get an iterable -try: - context = ElementTree.iterparse(in_file, events=("start", "end")) -except: - stop_err("Invalid data format.") -# turn it into an iterator -context = iter(context) -# get the root element -try: - event, root = context.next() -except: - stop_err( "Invalid data format." ) +for in_file in args: + if not os.path.isfile(in_file): + stop_err("Input BLAST XML file not found: %s" % in_file) re_default_query_id = re.compile("^Query_\d+$") @@ -122,156 +145,187 @@ assert not re_default_subject_id.match("TheSubject_1") -outfile = open(out_file, 'w') -blast_program = None -for event, elem in context: - if event == "end" and elem.tag == "BlastOutput_program": - blast_program = elem.text - # for every <Iteration> tag - if event == "end" and elem.tag == "Iteration": - #Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA - # <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> - # <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> - # <Iteration_query-len>406</Iteration_query-len> - # <Iteration_hits></Iteration_hits> - # - #Or, from BLAST 2.2.24+ run online - # <Iteration_query-ID>Query_1</Iteration_query-ID> - # <Iteration_query-def>Sample</Iteration_query-def> - # <Iteration_query-len>516</Iteration_query-len> - # <Iteration_hits>... - qseqid = elem.findtext("Iteration_query-ID") - if re_default_query_id.match(qseqid): - #Place holder ID, take the first word of the query definition - qseqid = elem.findtext("Iteration_query-def").split(None,1)[0] - qlen = int(elem.findtext("Iteration_query-len")) - - # for every <Hit> within <Iteration> - for hit in elem.findall("Iteration_hits/Hit"): - #Expecting either this, - # <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id> - # <Hit_def>RecName: Full=Rhodopsin</Hit_def> - # <Hit_accession>P56514</Hit_accession> - #or, - # <Hit_id>Subject_1</Hit_id> - # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def> - # <Hit_accession>Subject_1</Hit_accession> +def convert(blastxml_filename, output_handle): + blast_program = None + # get an iterable + try: + context = ElementTree.iterparse(in_file, events=("start", "end")) + except: + stop_err("Invalid data format.") + # turn it into an iterator + context = iter(context) + # get the root element + try: + event, root = context.next() + except: + stop_err( "Invalid data format." ) + for event, elem in context: + if event == "end" and elem.tag == "BlastOutput_program": + blast_program = elem.text + # for every <Iteration> tag + if event == "end" and elem.tag == "Iteration": + #Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA + # <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID> + # <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def> + # <Iteration_query-len>406</Iteration_query-len> + # <Iteration_hits></Iteration_hits> # - #apparently depending on the parse_deflines switch - # - #Or, with BLAST 2.2.28+ can get this, - # <Hit_id>gnl|BL_ORD_ID|2</Hit_id> - # <Hit_def>chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence</Hit_def> - # <Hit_accession>2</Hit_accession> - sseqid = hit.findtext("Hit_id").split(None,1)[0] - hit_def = sseqid + " " + hit.findtext("Hit_def") - if re_default_subject_id.match(sseqid) \ - and sseqid == hit.findtext("Hit_accession"): - #Place holder ID, take the first word of the subject definition - hit_def = hit.findtext("Hit_def") - sseqid = hit_def.split(None,1)[0] - if sseqid.startswith("gnl|BL_ORD_ID|") \ - and sseqid == "gnl|BL_ORD_ID|" + hit.findtext("Hit_accession"): - #Alternative place holder ID, again take the first word of hit_def - hit_def = hit.findtext("Hit_def") - sseqid = hit_def.split(None,1)[0] - # for every <Hsp> within <Hit> - for hsp in hit.findall("Hit_hsps/Hsp"): - nident = hsp.findtext("Hsp_identity") - length = hsp.findtext("Hsp_align-len") - pident = "%0.2f" % (100*float(nident)/float(length)) + #Or, from BLAST 2.2.24+ run online + # <Iteration_query-ID>Query_1</Iteration_query-ID> + # <Iteration_query-def>Sample</Iteration_query-def> + # <Iteration_query-len>516</Iteration_query-len> + # <Iteration_hits>... + qseqid = elem.findtext("Iteration_query-ID") + if re_default_query_id.match(qseqid): + #Place holder ID, take the first word of the query definition + qseqid = elem.findtext("Iteration_query-def").split(None,1)[0] + qlen = int(elem.findtext("Iteration_query-len")) - q_seq = hsp.findtext("Hsp_qseq") - h_seq = hsp.findtext("Hsp_hseq") - m_seq = hsp.findtext("Hsp_midline") - assert len(q_seq) == len(h_seq) == len(m_seq) == int(length) - gapopen = str(len(q_seq.replace('-', ' ').split())-1 + \ - len(h_seq.replace('-', ' ').split())-1) + # for every <Hit> within <Iteration> + for hit in elem.findall("Iteration_hits/Hit"): + #Expecting either this, + # <Hit_id>gi|3024260|sp|P56514.1|OPSD_BUFBU</Hit_id> + # <Hit_def>RecName: Full=Rhodopsin</Hit_def> + # <Hit_accession>P56514</Hit_accession> + #or, + # <Hit_id>Subject_1</Hit_id> + # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def> + # <Hit_accession>Subject_1</Hit_accession> + # + #apparently depending on the parse_deflines switch + # + #Or, with a local database not using -parse_seqids can get this, + # <Hit_id>gnl|BL_ORD_ID|2</Hit_id> + # <Hit_def>chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence</Hit_def> + # <Hit_accession>2</Hit_accession> + sseqid = hit.findtext("Hit_id").split(None,1)[0] + hit_def = sseqid + " " + hit.findtext("Hit_def") + if re_default_subject_id.match(sseqid) \ + and sseqid == hit.findtext("Hit_accession"): + #Place holder ID, take the first word of the subject definition + hit_def = hit.findtext("Hit_def") + sseqid = hit_def.split(None,1)[0] + if sseqid.startswith("gnl|BL_ORD_ID|") \ + and sseqid == "gnl|BL_ORD_ID|" + hit.findtext("Hit_accession"): + #Alternative place holder ID, again take the first word of hit_def + hit_def = hit.findtext("Hit_def") + sseqid = hit_def.split(None,1)[0] + # for every <Hsp> within <Hit> + for hsp in hit.findall("Hit_hsps/Hsp"): + nident = hsp.findtext("Hsp_identity") + length = hsp.findtext("Hsp_align-len") + pident = "%0.2f" % (100*float(nident)/float(length)) + + q_seq = hsp.findtext("Hsp_qseq") + h_seq = hsp.findtext("Hsp_hseq") + m_seq = hsp.findtext("Hsp_midline") + assert len(q_seq) == len(h_seq) == len(m_seq) == int(length) + gapopen = str(len(q_seq.replace('-', ' ').split())-1 + \ + len(h_seq.replace('-', ' ').split())-1) + + mismatch = m_seq.count(' ') + m_seq.count('+') \ + - q_seq.count('-') - h_seq.count('-') + #TODO - Remove this alternative mismatch calculation and test + #once satisifed there are no problems + expected_mismatch = len(q_seq) \ + - sum(1 for q,h in zip(q_seq, h_seq) \ + if q == h or q == "-" or h == "-") + xx = sum(1 for q,h in zip(q_seq, h_seq) if q=="X" and h=="X") + if not (expected_mismatch - q_seq.count("X") <= int(mismatch) <= expected_mismatch + xx): + stop_err("%s vs %s mismatches, expected %i <= %i <= %i" \ + % (qseqid, sseqid, expected_mismatch - q_seq.count("X"), + int(mismatch), expected_mismatch)) - mismatch = m_seq.count(' ') + m_seq.count('+') \ - - q_seq.count('-') - h_seq.count('-') - #TODO - Remove this alternative mismatch calculation and test - #once satisifed there are no problems - expected_mismatch = len(q_seq) \ - - sum(1 for q,h in zip(q_seq, h_seq) \ - if q == h or q == "-" or h == "-") - xx = sum(1 for q,h in zip(q_seq, h_seq) if q=="X" and h=="X") - if not (expected_mismatch - q_seq.count("X") <= int(mismatch) <= expected_mismatch + xx): - stop_err("%s vs %s mismatches, expected %i <= %i <= %i" \ - % (qseqid, sseqid, expected_mismatch - q_seq.count("X"), - int(mismatch), expected_mismatch)) + #TODO - Remove this alternative identity calculation and test + #once satisifed there are no problems + expected_identity = sum(1 for q,h in zip(q_seq, h_seq) if q == h) + if not (expected_identity - xx <= int(nident) <= expected_identity + q_seq.count("X")): + stop_err("%s vs %s identities, expected %i <= %i <= %i" \ + % (qseqid, sseqid, expected_identity, int(nident), + expected_identity + q_seq.count("X"))) + - #TODO - Remove this alternative identity calculation and test - #once satisifed there are no problems - expected_identity = sum(1 for q,h in zip(q_seq, h_seq) if q == h) - if not (expected_identity - xx <= int(nident) <= expected_identity + q_seq.count("X")): - stop_err("%s vs %s identities, expected %i <= %i <= %i" \ - % (qseqid, sseqid, expected_identity, int(nident), - expected_identity + q_seq.count("X"))) + evalue = hsp.findtext("Hsp_evalue") + if evalue == "0": + evalue = "0.0" + else: + evalue = "%0.0e" % float(evalue) + bitscore = float(hsp.findtext("Hsp_bit-score")) + if bitscore < 100: + #Seems to show one decimal place for lower scores + bitscore = "%0.1f" % bitscore + else: + #Note BLAST does not round to nearest int, it truncates + bitscore = "%i" % bitscore - evalue = hsp.findtext("Hsp_evalue") - if evalue == "0": - evalue = "0.0" - else: - evalue = "%0.0e" % float(evalue) - - bitscore = float(hsp.findtext("Hsp_bit-score")) - if bitscore < 100: - #Seems to show one decimal place for lower scores - bitscore = "%0.1f" % bitscore - else: - #Note BLAST does not round to nearest int, it truncates - bitscore = "%i" % bitscore + values = [qseqid, + sseqid, + pident, + length, #hsp.findtext("Hsp_align-len") + str(mismatch), + gapopen, + hsp.findtext("Hsp_query-from"), #qstart, + hsp.findtext("Hsp_query-to"), #qend, + hsp.findtext("Hsp_hit-from"), #sstart, + hsp.findtext("Hsp_hit-to"), #send, + evalue, #hsp.findtext("Hsp_evalue") in scientific notation + bitscore, #hsp.findtext("Hsp_bit-score") rounded + ] - values = [qseqid, - sseqid, - pident, - length, #hsp.findtext("Hsp_align-len") - str(mismatch), - gapopen, - hsp.findtext("Hsp_query-from"), #qstart, - hsp.findtext("Hsp_query-to"), #qend, - hsp.findtext("Hsp_hit-from"), #sstart, - hsp.findtext("Hsp_hit-to"), #send, - evalue, #hsp.findtext("Hsp_evalue") in scientific notation - bitscore, #hsp.findtext("Hsp_bit-score") rounded - ] + if extended: + try: + sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >")) + salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >")) + except IndexError as e: + stop_err("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e)) + #print hit_def, "-->", sallseqid + positive = hsp.findtext("Hsp_positive") + ppos = "%0.2f" % (100*float(positive)/float(length)) + qframe = hsp.findtext("Hsp_query-frame") + sframe = hsp.findtext("Hsp_hit-frame") + if blast_program == "blastp": + #Probably a bug in BLASTP that they use 0 or 1 depending on format + if qframe == "0": qframe = "1" + if sframe == "0": sframe = "1" + slen = int(hit.findtext("Hit_len")) + values.extend([sallseqid, + hsp.findtext("Hsp_score"), #score, + nident, + positive, + hsp.findtext("Hsp_gaps"), #gaps, + ppos, + qframe, + sframe, + #NOTE - for blastp, XML shows original seq, tabular uses XXX masking + q_seq, + h_seq, + str(qlen), + str(slen), + salltitles, + ]) + if cols: + #Only a subset of the columns are needed + values = [values[colnames.index(c)] for c in cols] + #print "\t".join(values) + outfile.write("\t".join(values) + "\n") + # prevents ElementTree from growing large datastructure + root.clear() + elem.clear() - if extended: - try: - sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >")) - salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >")) - except IndexError as e: - stop_err("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e)) - #print hit_def, "-->", sallseqid - positive = hsp.findtext("Hsp_positive") - ppos = "%0.2f" % (100*float(positive)/float(length)) - qframe = hsp.findtext("Hsp_query-frame") - sframe = hsp.findtext("Hsp_hit-frame") - if blast_program == "blastp": - #Probably a bug in BLASTP that they use 0 or 1 depending on format - if qframe == "0": qframe = "1" - if sframe == "0": sframe = "1" - slen = int(hit.findtext("Hit_len")) - values.extend([sallseqid, - hsp.findtext("Hsp_score"), #score, - nident, - positive, - hsp.findtext("Hsp_gaps"), #gaps, - ppos, - qframe, - sframe, - #NOTE - for blastp, XML shows original seq, tabular uses XXX masking - q_seq, - h_seq, - str(qlen), - str(slen), - salltitles, - ]) - #print "\t".join(values) - outfile.write("\t".join(values) + "\n") - # prevents ElementTree from growing large datastructure - root.clear() - elem.clear() -outfile.close() + +if options.output: + outfile = open(options.output, "w") +else: + outfile = sys.stdout + +for in_file in args: + blast_program = None + convert(in_file, outfile) + +if options.output: + outfile.close() +else: + #Using stdout + pass +
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,8 +1,15 @@ -<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.22"> +<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.1.00"> <description>Convert BLAST XML output to tabular</description> <version_command interpreter="python">blastxml_to_tabular.py --version</version_command> <command interpreter="python"> - blastxml_to_tabular.py $blastxml_file $tabular_file $out_format +blastxml_to_tabular.py -o "$tabular_file" +#if $output.out_format == "cols": +#set cols = (str($output.std_cols)+","+str($output.ext_cols)).replace("None", " ").replace(",,", ",").replace(",", " ") +-c "$cols" +#else +-c "$output.out_format" +#end if +#for i in $blastxml_file#${i} #end for# </command> <stdio> <!-- Anything other than zero is an error --> @@ -10,14 +17,50 @@ <exit_code range=":-1" /> </stdio> <inputs> - <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/> - <param name="out_format" type="select" label="Output format"> - <option value="std">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 24 columns)</option> - </param> + <param name="blastxml_file" type="data" format="blastxml" multiple="true" label="BLAST results as XML"/> + <conditional name="output"> + <param name="out_format" type="select" label="Output format"> + <option value="std" selected="True">Tabular (standard 12 columns)</option> + <option value="ext">Tabular (extended 25 columns)</option> + <option value="cols">Tabular (select columns to output)</option> + </param> + <when value="std"/> + <when value="ext"/> + <when value="cols"> + <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns"> + <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option> + <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option> + <option selected="true" value="pident">pident = Percentage of identical matches</option> + <option selected="true" value="length">length = Alignment length</option> + <option selected="true" value="mismatch">mismatch = Number of mismatches</option> + <option selected="true" value="gapopen">gapopen = Number of gap openings</option> + <option selected="true" value="qstart">qstart = Start of alignment in query</option> + <option selected="true" value="qend">qend = End of alignment in query</option> + <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option> + <option selected="true" value="send">send = End of alignment in subject (database hit)</option> + <option selected="true" value="evalue">evalue = Expectation value (E-value)</option> + <option selected="true" value="bitscore">bitscore = Bit score</option> + </param> + <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns"> + <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option> + <option value="score">score = Raw score</option> + <option value="nident">nident = Number of identical matches</option> + <option value="positive">positive = Number of positive-scoring matches</option> + <option value="gaps">gaps = Total number of gaps</option> + <option value="ppos">ppos = Percentage of positive-scoring matches</option> + <option value="qframe">qframe = Query frame</option> + <option value="sframe">sframe = Subject frame</option> + <option value="qseq">qseq = Aligned part of query sequence</option> + <option value="sseq">sseq = Aligned part of subject sequence</option> + <option value="qlen">qlen = Query sequence length</option> + <option value="slen">slen = Subject sequence length</option> + <option value="salltitles">salltitles = All subject title(s), separated by a '<>'</option> + </param> + </when> + </conditional> </inputs> <outputs> - <data name="tabular_file" format="tabular" label="$blastxml_file.display_name (as tabular)" /> + <data name="tabular_file" format="tabular" label="$on_string (as tabular)" /> </outputs> <requirements> </requirements> @@ -80,6 +123,19 @@ <param name="out_format" value="ext" /> <output name="tabular_file" file="blastn_arabidopsis.extended.tabular" ftype="tabular" /> </test> + <!-- there are some harmless white space differences in our conversion to the BLAST+ output here: --> + <test> + <param name="blastxml_file" value="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" /> + <param name="out_format" value="std" /> + <output name="tabular_file" file="blastn_rhodopsin_vs_three_human_converted.tabular" ftype="tabular" /> + </test> + <test> + <param name="blastxml_file" value="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" /> + <param name="out_format" value="cols" /> + <param name="std_cols" value="qseqid,sseqid,pident" /> + <param name="ext_cols" value="qlen,slen" /> + <output name="tabular_file" file="blastn_rhodopsin_vs_three_human.columns.tabular" ftype="tabular" /> + </test> </tests> <help> @@ -120,7 +176,7 @@ ====== ============= =========================================== Column NCBI name Description ------ ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by ';' + 13 sallseqid All subject Seq-id(s), separated by a ';' 14 score Raw score 15 nident Number of identical matches 16 positive Number of positive-scoring matches @@ -132,7 +188,7 @@ 22 sseq Aligned part of subject sequence 23 qlen Query sequence length 24 slen Subject sequence length - 25 salltitles All subject title(s), separated by '<>' + 25 salltitles All subject title(s), separated by a '<>' ====== ============= =========================================== Beware that the XML file (and thus the conversion) and the tabular output
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.22"> +<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.1.00"> <description>Show BLAST database information from blastdbcmd</description> <macros> <token name="@BINARY@">blastdbcmd</token>
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.22"> +<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.1.00"> <description>Extract sequence(s) from BLAST database</description> <macros> <token name="@BINARY@">blastdbcmd</token>
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.22"> +<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.1.00"> <description>Search nucleotide database with nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism> @@ -36,14 +36,16 @@ <expand macro="input_conditional_nucleotide_db" /> <param name="blast_type" type="select" display="radio" label="Type of BLAST"> - <option value="megablast">megablast</option> - <option value="blastn">blastn</option> - <option value="blastn-short">blastn-short</option> - <option value="dc-megablast">dc-megablast</option> + <option value="megablast">megablast - Traditional megablast used to find very similar (e.g., intraspecies or closely related species) sequences</option> + <option value="blastn">blastn - Traditional BLASTN requiring an exact match of 11, for somewhat similar sequences</option> + <option value="blastn-short">blastn-short - BLASTN program optimized for sequences shorter than 50 bases</option> + <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option> <!-- Using BLAST 2.2.24+ this gives an error: BLAST engine error: Program type 'vecscreen' not supported <option value="vecscreen">vecscreen</option> + In any case, vecscreen has gone in BLAST+ 2.2.28 --> + <!-- BLAST+ 2.2.28 also offers rmblastn --> </param> <expand macro="input_evalue" /> <expand macro="input_out_format" /> @@ -63,7 +65,7 @@ </expand> </inputs> <outputs> - <data name="output1" format="tabular" label="${blast_type.value_label} $query.name vs @ON_DB_SUBJECT@"> + <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@"> <expand macro="output_change_format" /> </data> </outputs> @@ -74,10 +76,32 @@ <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> <param name="database" value="" /> <param name="evalue_cutoff" value="1e-40" /> + <param name="out_format" value="5" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" /> + </test> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-40" /> <param name="out_format" value="6" /> <param name="adv_opts_selector" value="basic" /> <output name="output1" file="blastn_rhodopsin_vs_three_human.tabular" ftype="tabular" /> </test> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-40" /> + <param name="out_format" value="cols" /> + <param name="std_cols" value="qseqid,sseqid,pident" /> + <param name="ext_cols" value="qlen,slen" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastn_rhodopsin_vs_three_human.columns.tabular" ftype="tabular" /> + </test> </tests> <help>
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.22"> +<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.1.00"> <description>Search protein database with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" /> @@ -34,8 +34,8 @@ <expand macro="input_conditional_protein_db" /> <param name="blast_type" type="select" display="radio" label="Type of BLAST"> - <option value="blastp">blastp</option> - <option value="blastp-short">blastp-short</option> + <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option> + <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option> </param> <expand macro="input_evalue" /> <expand macro="input_out_format" /> @@ -55,7 +55,7 @@ </expand> </inputs> <outputs> - <data name="output1" format="tabular" label="${blast_type.value_label} $query.name vs @ON_DB_SUBJECT@"> + <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@"> <expand macro="output_change_format" /> </data> </outputs>
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.22"> +<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.1.00"> <description>Search protein database with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism> @@ -83,6 +83,21 @@ <param name="adv_opts_selector" value="basic" /> <output name="output1" file="blastx_rhodopsin_vs_four_human_ext.tabular" ftype="tabular" /> </test> + <test> + <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" /> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="database" value="" /> + <param name="evalue_cutoff" value="1e-10" /> + <param name="out_format" value="cols" /> + <param name="std_cols" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" /> + <param name="ext_cols" value="sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" /> + <param name="ids_cols" value="qgi,qacc,qaccver,sallseqid,sgi,sallgi,sacc,saccver,sallacc,stitle" /> + <param name="misc_cols" value="sstrand,frames,btop,qcovs,qcovhsp" /> + <param name="tax_cols" value="staxids,sscinames,scomnames,sblastnames,sskingdoms" /> + <param name="adv_opts_selector" value="basic" /> + <output name="output1" file="blastx_rhodopsin_vs_four_human_all.tabular" ftype="tabular" /> + </test> </tests> <help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,87 @@ +<tool id="ncbi_convert2blastmask_wrapper" name="NCBI BLAST+ convert2blastmask" version="0.1.00"> + <description>Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb</description> + <macros> + <token name="@BINARY@">convert2blastmask</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +convert2blastmask +-in $infile +-masking_algorithm "$masking_algorithm" +-masking_options "$masking_options" +$parse_seqids +-out "$outfile" +-outfmt $outformat + </command> + <expand macro="stdio" /> + <inputs> + <param name="infile" type="data" format="fasta" label="masked FASTA file"/> + <param name="masking_algorithm" type="select" label="Used masking algorithm"> + <option value="dust">DUST</option> + <option value="seg" selected="true">SEG</option> + <option value="windowmasker">windowmasker</option> + <option value="repeat">repeat</option> + <option value="other">other</option> + </param> + <param name="masking_options" type="text" value="" size="20" label="Masking algorithm options to create the masked input" + help ="free text to describe the options used to create the masking files. (-masking_options)"> + <sanitizer invalid_char=""> + <valid initial="string.printable" /> + </sanitizer> + </param> + <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="true" label="Parse Seq-ids in FASTA input" help="(-parse_seqids)" /> + <param name="outformat" type="select" label="Output format"> + <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> + <option value="maskinfo_asn1_text" selected="True">maskinfo ASN.1 text</option> + <option value="maskinfo_xml">maskinfo_xml</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="maskinfo-asn1" label="SEG Masked File"> + <change_format> + <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <!-- + <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + --> + <when input="outformat" value="maskinfo_xml" format="xml" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" /> + <param name="masking_algorithm" value="seg" /> + <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" /> + <param name="parse_seqids" value="True" /> + <param name="outformat" value="maskinfo_asn1_bin" /> + <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1-binary" /> + </test> + <test> + <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" /> + <param name="masking_algorithm" value="seg" /> + <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" /> + <param name="parse_seqids" value="True" /> + <param name="outformat" value="maskinfo_asn1_text" /> + <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1" /> + </test> + </tests> + <help> +**What it does** + +Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb. + +More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers (a more specific paper covering this wrapper is planned): + +@REFERENCES@ + </help> +</tool>
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.0.22"> +<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.1.00"> <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo --> <description>masks low complexity regions</description> <macros> @@ -27,27 +27,24 @@ <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" /> <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" /> <param name="outformat" type="select" label="Output format"> -<!-- acclist and maskinfo_xml are listed as possible output formats in - "dustmasker -help", but were not recognized by NCBI BLAST up to - release 2.2.27+. Fixed in BLAST 2.2.28+. - seqloc_* formats are not very useful --> -<!-- <option value="acclist">acclist</option>--> + <!-- seqloc_* formats are not very useful + and what BLAST+ calls 'interval' is not what Galaxy calls interval format + --> <option value="fasta">FASTA</option> - <option value="interval" selected="true">interval</option> <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> - <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option> -<!-- <option value="maskinfo_xml">maskinfo_xml</option> - <option value="seqloc_asn1_bin">seqloc_asn1_bin</option> - <option value="seqloc_asn1_text">seqloc_asn1_text</option> - <option value="seqloc_xml">seqloc_xml</option>--> + <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option> + <option value="maskinfo_xml">maskinfo_xml</option> </param> </inputs> <outputs> - <data name="outfile" format="interval" label="DUST Masked File"> + <data name="outfile" format="maskinfo-asn1" label="DUST Masked File"> <change_format> <when input="outformat" value="fasta" format="fasta" /> <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <!-- <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + --> + <when input="outformat" value="maskinfo_xml" format="xml" /> </change_format> </data> </outputs> @@ -83,13 +80,14 @@ <help> **What it does** -This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm. +This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST_ algorithm. If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_. .. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ +.. _DUST: http://www.ncbi.nlm.nih.gov/pubmed/16796549 **References**
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,30 +1,101 @@ <macros> <xml name="output_change_format"> <change_format> - <when input="out_format" value="0" format="txt"/> - <when input="out_format" value="0 -html" format="html"/> - <when input="out_format" value="2" format="txt"/> - <when input="out_format" value="2 -html" format="html"/> - <when input="out_format" value="4" format="txt"/> - <when input="out_format" value="4 -html" format="html"/> - <when input="out_format" value="5" format="blastxml"/> + <when input="output.out_format" value="0" format="txt"/> + <when input="output.out_format" value="0 -html" format="html"/> + <when input="output.out_format" value="2" format="txt"/> + <when input="output.out_format" value="2 -html" format="html"/> + <when input="output.out_format" value="4" format="txt"/> + <when input="output.out_format" value="4 -html" format="html"/> + <when input="output.out_format" value="5" format="blastxml"/> </change_format> </xml> <xml name="input_out_format"> - <param name="out_format" type="select" label="Output format"> - <option value="6">Tabular (standard 12 columns)</option> - <option value="ext" selected="True">Tabular (extended 25 columns)</option> - <option value="5">BLAST XML</option> - <option value="0">Pairwise text</option> - <option value="0 -html">Pairwise HTML</option> - <option value="2">Query-anchored text</option> - <option value="2 -html">Query-anchored HTML</option> - <option value="4">Flat query-anchored text</option> - <option value="4 -html">Flat query-anchored HTML</option> - <!-- - <option value="-outfmt 11">BLAST archive format (ASN.1)</option> - --> - </param> + <conditional name="output"> + <param name="out_format" type="select" label="Output format"> + <option value="6">Tabular (standard 12 columns)</option> + <option value="ext" selected="True">Tabular (extended 25 columns)</option> + <option value="cols">Tabular (select which columns)</option> + <option value="5">BLAST XML</option> + <option value="0">Pairwise text</option> + <option value="0 -html">Pairwise HTML</option> + <option value="2">Query-anchored text</option> + <option value="2 -html">Query-anchored HTML</option> + <option value="4">Flat query-anchored text</option> + <option value="4 -html">Flat query-anchored HTML</option> + <!-- + <option value="-outfmt 11">BLAST archive format (ASN.1)</option> + --> + </param> + <when value="6"/> + <when value="ext"/> + <when value="cols"> + <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns"> + <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option> + <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option> + <option selected="true" value="pident">pident = Percentage of identical matches</option> + <option selected="true" value="length">length = Alignment length</option> + <option selected="true" value="mismatch">mismatch = Number of mismatches</option> + <option selected="true" value="gapopen">gapopen = Number of gap openings</option> + <option selected="true" value="qstart">qstart = Start of alignment in query</option> + <option selected="true" value="qend">qend = End of alignment in query</option> + <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option> + <option selected="true" value="send">send = End of alignment in subject (database hit)</option> + <option selected="true" value="evalue">evalue = Expectation value (E-value)</option> + <option selected="true" value="bitscore">bitscore = Bit score</option> + </param> + <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns"> + <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option> + <option value="score">score = Raw score</option> + <option value="nident">nident = Number of identical matches</option> + <option value="positive">positive = Number of positive-scoring matches</option> + <option value="gaps">gaps = Total number of gaps</option> + <option value="ppos">ppos = Percentage of positive-scoring matches</option> + <option value="qframe">qframe = Query frame</option> + <option value="sframe">sframe = Subject frame</option> + <option value="qseq">qseq = Aligned part of query sequence</option> + <option value="sseq">sseq = Aligned part of subject sequence</option> + <option value="qlen">qlen = Query sequence length</option> + <option value="slen">slen = Subject sequence length</option> + <option value="salltitles">salltitles = All subject title(s), separated by a '<>'</option> + </param> + <param name="ids_cols" type="select" multiple="true" display="checkboxes" label="Other identifer columns"> + <option value="qgi">qgi = Query GI</option> + <option value="qacc">qacc = Query accesion</option> + <option value="qaccver">qaccver = Query accesion.version</option> + <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option> + <option value="sgi">sgi = Subject GI</option> + <option value="sallgi">sallgi = All subject GIs</option> + <option value="sacc">sacc = Subject accession</option> + <option value="saccver">saccver = Subject accession.version</option> + <option value="sallacc">sallacc = All subject accessions</option> + <option value="stitle">stitle = Subject Title</option> + </param> + <param name="misc_cols" type="select" multiple="true" display="checkboxes" label="Miscellaneous columns"> + <option value="sstrand">sstrand = Subject Strand</option> + <!-- Is it really worth including 'frames' given have 'qframe' and 'sframe'? --> + <option value="frames">frames = Query and subject frames separated by a '/'</option> + <option value="btop">btop = Blast traceback operations (BTOP)</option> + <option value="qcovs">qcovs = Query Coverage Per Subject</option> + <option value="qcovhsp">qcovhsp = Query Coverage Per HSP</option> + </param> + <param name="tax_cols" type="select" multiple="true" display="checkboxes" label="Taxonomy columns"> + <option value="staxids">staxids = unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)</option> + <!-- TODO, how to handle the taxonomy data file dependency? If missing these give N/A --> + <option value="sscinames">sscinames = unique Subject Scientific Name(s), separated by a ';'</option> + <option value="scomnames">scomnames = unique Subject Common Name(s), separated by a ';'</option> + <option value="sblastnames">sblastnames = unique Subject Blast Name(s), separated by a ';' (in alphabetical order)</option> + <option value="sskingdoms">sskingdoms = unique Subject Super Kingdom(s), separated by a ';' (in alphabetical order)</option> + </param> + </when> + <when value="5"/> + <when value="0"/> + <when value="0 -html"/> + <when value="2"/> + <when value="2 -html"/> + <when value="4"/> + <when value="4 -html"/> + </conditional> </xml> <xml name="input_scoring_matrix"> <param name="matrix" type="select" label="Scoring matrix"> @@ -240,7 +311,7 @@ <xml name="requirements"> <requirements> <requirement type="binary">@BINARY@</requirement> - <requirement type="package" version="2.2.28">blast+</requirement> + <requirement type="package" version="2.2.29">blast+</requirement> </requirements> <version_command>@BINARY@ -version</version_command> </xml> @@ -268,10 +339,15 @@ </token> <token name="@BLAST_OUTPUT@">-out "$output1" ##Set the extended list here so when we add things, saved workflows are not affected -#if str($out_format)=="ext": +#if str($output.out_format)=="ext": -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles" +#elif str($output.out_format)=="cols" +##Pick your own columns. Galaxy gives us it comma separated, BLAST+ wants space separated: +##TODO - Can we catch the user picking no columns and raise an error here? +#set cols = (str($output.std_cols)+","+str($output.ext_cols)+","+str($output.ids_cols)+","+str($output.misc_cols)+","+str($output.tax_cols)).replace("None", "").replace(",,", ",").replace(",", " ").strip() + -outfmt "6 $cols" #else: - -outfmt $out_format + -outfmt $output.out_format #end if </token> <token name="@ADVANCED_OPTIONS@">$adv_opts.filter_query @@ -330,7 +406,7 @@ ====== ========= ============================================ The BLAST+ tools can optionally output additional columns of information, -but this takes longer to calculate. Most (but not all) of these columns are +but this takes longer to calculate. Many commonly used extra columns are included by selecting the extended tabular output. The extra columns are included *after* the standard 12 columns. This is so that you can write workflow filtering steps that accept either the 12 or 25 column tabular @@ -339,7 +415,7 @@ ====== ============= =========================================== Column NCBI name Description ------ ------------- ------------------------------------------- - 13 sallseqid All subject Seq-id(s), separated by ';' + 13 sallseqid All subject Seq-id(s), separated by a ';' 14 score Raw score 15 nident Number of identical matches 16 positive Number of positive-scoring matches @@ -351,10 +427,14 @@ 22 sseq Aligned part of subject sequence 23 qlen Query sequence length 24 slen Subject sequence length - 25 salltitles All subject title(s), separated by '<>' + 25 salltitles All subject title(s), separated by a '<>' ====== ============= =========================================== -The third option is BLAST XML output, which is designed to be parsed by +The third option is to customise the tabular output by selecting which +columns you want, from the standard set of 12, the default set of 25, +or any of the additional columns BLAST+ offers (including species name). + +The fourth option is BLAST XML output, which is designed to be parsed by another program, and is understood by some Galaxy tools. You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.22"> +<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.1.00"> <description>Make BLAST database</description> <macros> <token name="@BINARY@">makeblastdb</token> @@ -8,50 +8,44 @@ <command interpreter="python">check_no_duplicates.py ##First check for duplicates (since BLAST+ 2.2.28 fails to do so) ##and abort (via the ampersand ampersand trick) if any are found. -#for $i in $in -"${i.file}" -#end for +#for i in $input_file#"${i}" #end for# && makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}" $parse_seqids $hash_index ## Single call to -in with multiple filenames space separated with outer quotes ## (presumably any filenames with spaces would be a problem). Note this gives -## some extra spaces, e.g. -in " file1 file2 file3 " but BLAST seems happy: --in " -#for $i in $in -${i.file} -#end for -" +## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy: +-in "#for i in $input_file#${i} #end for#" #if $title: -title "$title" #else: ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful -title "BLAST Database" #end if --dbtype $dbtype -#set $mask_string = '' -#set $sep = '-mask_data ' -#for $i in $mask_data -#set $mask_string += $sep + str($i.file) -#set $sep = ',' +-dbtype $dbtype +## -------------------------------------------------------------------- +## Masking +## -------------------------------------------------------------------- +## HACK: If no mask files, evaluates as a list with just None in it: +## See Trello issue https://trello.com/c/lp5YmA1O +#if ' '.join( map(str, $mask_data_file) ) != 'None': +#for i in $mask_data_file: +-mask_data "${i}" #end for -$mask_string -## #set $gi_mask_string = '' -## #set $sep = '-gi_mask -gi_mask_name ' -## #for $i in $gi_mask -## #set $gi_mask_string += $sep + str($i.file) -## #set $sep = ',' -## #end for -## $gi_mask_string -## #if $tax.select == 'id': -## -taxid $tax.id -## #else if $tax.select == 'map': -## -taxid_map $tax.map -## #end if +#end if +## -------------------------------------------------------------------- +## Taxonomy +## -------------------------------------------------------------------- +#if $tax.taxselect == 'id': +-taxid $tax.taxid +## TODO - Can we use a tabular file for the taxonomy mapping? +## #else if $tax.taxselect == 'map': +## -taxid_map $tax.taxmap +#end if ## -------------------------------------------------------------------- ## Capture the stdout log information to the primary file (plain text): ->> "$outfile" +> "$outfile" </command> <expand macro="stdio" /> <inputs> @@ -59,47 +53,38 @@ <option value="prot">protein</option> <option value="nucl">nucleotide</option> </param> - <!-- TODO Allow merging of existing BLAST databases (conditional on the database type) + <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)? NOTE Double check the new database would be self contained first - <repeat name="in" title="BLAST or FASTA Database" min="1"> - <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" /> - </repeat> --> - <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? --> - <repeat name="in" title="FASTA file" min="1"> - <param name="file" type="data" format="fasta" /> - </repeat> + <!-- Note this is a mandatory parameter - default should be most recent FASTA file --> + <param name="input_file" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" /> <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" /> <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" /> <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." /> <!-- SEQUENCE MASKING OPTIONS --> - <repeat name="mask_data" title="Masking data file"> - <param name="mask_data_file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> - </repeat> - <!-- TODO - <repeat name="gi_mask" title="Create GI indexed masking data"> - <param name="gi_mask_file" type="data" format="asnb" label="Masking data output file" /> - </repeat> - --> - + <!-- Note this is an optional parameter - default should be NO files --> + <param name="mask_data_file" type="data" multiple="true" optional="true" value="" format="maskinfo-asn1,maskinfo-asn1-binary" label="Optional ASN.1 file(s) containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" /> + <!-- TODO - Option to create GI indexed masking data? via -gi_mask and -gi_mask_name? --> <!-- TAXONOMY OPTIONS --> - <!-- TODO <conditional name="tax"> - <param name="select" type="select" label="Taxonomy options"> - <option value="">Do not assign sequences to Taxonomy IDs</option> - <option value="id">Assign all sequences to one Taxonomy ID</option> + <param name="taxselect" type="select" label="Taxonomy options"> + <option value="">Do not assign a Taxonomy ID to the sequences</option> + <option value="id">Assign the same Taxonomy ID to all the sequences</option> + <!-- <option value="map">Supply text file mapping sequence IDs to taxnomy IDs</option> + --> </param> <when value=""> </when> <when value="id"> - <param name="id" type="integer" value="" label="NCBI taxonomy ID" help="Integer >=0" /> + <param name="taxid" type="integer" value="" label="NCBI taxonomy ID" help="Integer >=0, e.g. 9606 for Homo sapiens" min="0" /> </when> + <!-- TODO: File format? <when value="map"> - <param name="file" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> + <param name="taxmap" type="data" format="txt" label="Seq ID : Tax ID mapping file" help="Format: SequenceId TaxonomyId" /> </when> + --> </conditional> - --> </inputs> <outputs> <!-- If we only accepted one FASTA file, we could use its human name here... --> @@ -112,14 +97,54 @@ </outputs> <tests> <!-- Note the (two line) PIN file is not reproducible run to run. + Likewise there is a datestamp in the log file as well, so use contains comparison + With and without the masking makes no difference. + With and without the taxid the only real difference is in the *.phr file. --> <test> <param name="dbtype" value="prot" /> - <param name="file" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="title" value="Just 4 human proteins" /> + <param name="parse_seqids" value="" /> + <param name="hash_index" value="true" /> + <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> + <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> + <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" /> + <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" /> + <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" /> + <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" /> + <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" /> + <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" /> + </output> + </test> + <test> + <param name="dbtype" value="prot" /> + <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> <param name="title" value="Just 4 human proteins" /> <param name="parse_seqids" value="" /> <param name="hash_index" value="true" /> - <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6"> + <param name="taxselect" value="id" /> + <param name="taxid" value="9606" /> + <output name="out_file" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp"> + <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.pog" name="blastdb.pog" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.phd" name="blastdb.phd" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.phi" name="blastdb.phi" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.psd" name="blastdb.psd" /> + <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" /> + </output> + </test> + <test> + <param name="dbtype" value="prot" /> + <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="title" value="Just 4 human proteins" /> + <param name="parse_seqids" value="" /> + <param name="hash_index" value="true" /> + <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" /> + <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp"> <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" /> <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" /> <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.22"> +<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.1.00"> <description>Search protein domain database (PSSMs) with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
--- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.22"> +<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.1.00"> <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -0,0 +1,101 @@ +<tool id="ncbi_segmasker_wrapper" name="NCBI BLAST+ segmasker" version="0.1.00"> + <description>low-complexity regions in protein sequences</description> + <macros> + <token name="@BINARY@">segmasker</token> + <import>ncbi_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> +## The command is a Cheetah template which allows some Python based syntax. +## Lines starting hash hash are comments. Galaxy will turn newlines into spaces +segmasker +#if $db_opts.db_opts_selector == "db": + -in "${db_opts.database.fields.path}" -infmt blastdb +#elif $db_opts.db_opts_selector == "histdb": + -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb +#else: + -in "$subject" -infmt fasta +#end if +-out "$outfile" +-window $window +-locut $locut +-hicut $hicut +-outfmt $outformat + </command> + <expand macro="stdio" /> + <inputs> + <expand macro="input_conditional_protein_db" /> + <param name="window" type="integer" value="12" label="SEG window length" help="(-window)" /> + <param name="locut" type="float" value="2.2" label="SEG low cutoff" help="(-locut)" /> + <param name="hicut" type="float" value="2.5" label="SEG high cutoff" help="(-hicut)" /> + <param name="outformat" type="select" label="Output format"> + <!-- seqloc_* formats are not very useful + and what BLAST+ calls 'interval' is not what Galaxy calls interval format + --> + <option value="fasta">FASTA</option> + <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option> + <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option> + <option value="maskinfo_xml">maskinfo_xml</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="maskinfo-asn1" label="SEG Masked File"> + <change_format> + <when input="outformat" value="fasta" format="fasta" /> + <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" /> + <!-- + <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" /> + --> + <when input="outformat" value="maskinfo_xml" format="xml" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="window" value="12" /> + <param name="locut" value="2.2" /> + <param name="hicut" value="2.5" /> + <param name="outformat" value="fasta" /> + <output name="outfile" file="segmasker_four_human.fasta" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="window" value="12" /> + <param name="locut" value="2.2" /> + <param name="hicut" value="2.5" /> + <param name="outformat" value="maskinfo_asn1_bin" /> + <output name="outfile" file="segmasker_four_human.maskinfo-asn1-binary" /> + </test> + <test> + <param name="db_opts_selector" value="file" /> + <param name="subject" value="four_human_proteins.fasta" ftype="fasta" /> + <param name="window" value="12" /> + <param name="locut" value="2.2" /> + <param name="hicut" value="2.5" /> + <param name="outformat" value="maskinfo_asn1_text" /> + <output name="outfile" file="segmasker_four_human.maskinfo-asn1" /> + </test> + </tests> + <help> +**What it does** + +This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm. + +If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool. + +More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_. + +.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/ +.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706 + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers (a more specific paper covering this wrapper is planned): + +@REFERENCES@ + </help> +</tool>
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.22"> +<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.1.00"> <description>Search translated nucleotide database with protein query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.22"> +<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.1.00"> <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Tue Jan 21 13:37:01 2014 -0500 +++ b/tools/ncbi_blast_plus/tool_dependencies.xml Fri Mar 14 07:40:46 2014 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="blast+" version="2.2.28"> - <repository changeset_revision="23b9ba41ad00" name="package_blast_plus_2_2_28" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> + <package name="blast+" version="2.2.29"> + <repository changeset_revision="a2ec897aac2c" name="package_blast_plus_2_2_29" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>