Repository 'ncbi_blast_plus'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus

Changeset 13:623f727cdff1 (2014-03-14)
Previous changeset 12:6560192c5098 (2014-01-21) Next changeset 14:2fe07f50a41e (2014-12-01)
Commit message:
Uploaded v0.1.00, uses BLAST+ 2.2.29, allows custom column selection for tabular output - including taxonomy fields.
modified:
test-data/blastn_rhodopsin_vs_three_human.tabular
test-data/blastp_four_human_vs_rhodopsin.tabular
test-data/blastp_four_human_vs_rhodopsin.xml
test-data/blastp_four_human_vs_rhodopsin_ext.tabular
test-data/blastp_rhodopsin_vs_four_human.tabular
test-data/blastx_rhodopsin_vs_four_human.tabular
test-data/blastx_rhodopsin_vs_four_human.xml
test-data/blastx_rhodopsin_vs_four_human_converted.tabular
test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular
test-data/blastx_rhodopsin_vs_four_human_ext.tabular
test-data/tblastn_four_human_vs_rhodopsin.html
test-data/tblastn_four_human_vs_rhodopsin.tabular
test-data/tblastn_four_human_vs_rhodopsin.xml
test-data/tblastn_four_human_vs_rhodopsin_ext.tabular
test-data/tblastx_rhodopsin_vs_three_human.tabular
tools/ncbi_blast_plus/README.rst
tools/ncbi_blast_plus/blastxml_to_tabular.py
tools/ncbi_blast_plus/blastxml_to_tabular.xml
tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
tools/ncbi_blast_plus/ncbi_macros.xml
tools/ncbi_blast_plus/ncbi_makeblastdb.xml
tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
tools/ncbi_blast_plus/tool_dependencies.xml
added:
test-data/blastn_rhodopsin_vs_three_human.columns.tabular
test-data/blastn_rhodopsin_vs_three_human.xml
test-data/blastn_rhodopsin_vs_three_human_converted.tabular
test-data/blastx_rhodopsin_vs_four_human_all.tabular
test-data/convert2blastmask_four_human_masked.maskinfo-asn1
test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary
test-data/four_human_proteins.fasta.log.txt
test-data/four_human_proteins_masked.fasta
test-data/four_human_proteins_taxid.fasta.log.txt
test-data/four_human_proteins_taxid.fasta.phd
test-data/four_human_proteins_taxid.fasta.phi
test-data/four_human_proteins_taxid.fasta.phr
test-data/four_human_proteins_taxid.fasta.pin
test-data/four_human_proteins_taxid.fasta.pog
test-data/four_human_proteins_taxid.fasta.psd
test-data/four_human_proteins_taxid.fasta.psi
test-data/four_human_proteins_taxid.fasta.psq
test-data/segmasker_four_human.fasta
test-data/segmasker_four_human.maskinfo-asn1
test-data/segmasker_four_human.maskinfo-asn1-binary
tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
removed:
test-data/four_human_proteins.fasta.log
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastn_rhodopsin_vs_three_human.columns.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human.columns.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 4301 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 4301 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 4301 1213
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 4301 1213
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 983 1213
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1047 1213
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastn_rhodopsin_vs_three_human.tabular
--- a/test-data/blastn_rhodopsin_vs_three_human.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastn_rhodopsin_vs_three_human.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,7 +1,7 @@
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 9e-133  460
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 7e-94  331
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 8e-74  265
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 8e-69  248
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0  1474
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132   460
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93   331
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73   265
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68   248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0  1323
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0  1208
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastn_rhodopsin_vs_three_human.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human.xml Fri Mar 14 07:40:46 2014 -0400
b
b'@@ -0,0 +1,549 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>blastn</BlastOutput_program>\n+  <BlastOutput_version>BLASTN 2.2.29+</BlastOutput_version>\n+  <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), &quot;A greedy algorithm for aligning DNA sequences&quot;, J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>\n+  <BlastOutput_db></BlastOutput_db>\n+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+  <BlastOutput_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</BlastOutput_query-def>\n+  <BlastOutput_query-len>1047</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_expect>1e-40</Parameters_expect>\n+      <Parameters_sc-match>1</Parameters_sc-match>\n+      <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>\n+      <Parameters_gap-open>0</Parameters_gap-open>\n+      <Parameters_gap-extend>0</Parameters_gap-extend>\n+      <Parameters_filter>L;m;</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+  <Iteration_iter-num>1</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>4933992</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>2</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>4933992</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>3</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n+  <Iteration_query-len>1047</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+  <Hit_num>1</Hit_num>\n+  <Hit_id>Subject_3</Hit_id>\n+  <Hit_def>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds</Hit_def>\n+  <Hit_accession>Subject_3</Hit_accession>\n+  <Hit_len>1213</Hit_len>\n+  <Hit_hsps>\n+    <Hsp>\n+      <Hsp_num>1</Hsp_num>\n+      <Hsp_bit-score>1474.75</Hsp_bit-score>\n+      <Hsp_score>798</Hsp_score>\n+      <Hsp_evalue>0</Hsp_evalue>\n+      <Hsp_query-from>1</Hsp_query-from>\n+      <Hsp_query-to>1047</Hsp_query-to>\n+      <Hsp_hit-from>88</Hsp_hit-from>\n+      <Hsp_hit-to>1134</Hsp_hit-to>\n+      <Hsp_query-frame>1</Hsp_query-frame>\n+      <Hsp_hit-frame>1</Hsp_hit-frame>\n+      <Hsp_identity>964</Hsp_identity>\n+      <Hsp_positive>964</Hsp_positive>\n+      <Hsp_gaps>0</Hsp_gaps>\n+      <Hsp_align-len>1047</Hsp_align-len>\n+      <Hsp_qseq>ATGAACGGGACGGAGGGCCCGAACTTCTACGTGCCC'..b'|||||||| |||||||||||||||||| | || || ||||||||||||||||||||||| |||||||| || |||||||||||  | || |||||||||| |||||| || ||||||||||| || |||||||| ||||| || || || |||||  | |||||||||||||||||| | |||||||||||||| ||||||||||||||||| || ||||||||||||||||| |||||||| ||||||||||||||||||||||||||||||||||| |||||||||||||| || ||||||||||||||||| |||||||| || || ||||| |||| ||||||||| || |||||||| |||||  ||||||||||||| || ||||| |||||||||| | |   ||||  |||||| ||||| || ||||||||||||||||| || ||||||| ||||||| ||||| |||| || |||||||| |||||||| |||||||||||||||||||| || ||||||||||||||||| |||||||| |||||||| |||||||||||||| || ||||||||||||||||||||||||||||| || |||||| ||||||||||  | ||||| ||||||||||||||||| ||||| ||| |||| || |||||||||||||||||||| || ||||||||||||| || | ||| |||| ||||| |||||||| ||||||||||||||||||||||||||||||||| |||||||  ||||||| ||||||||||| || |||||||| ||||||||  | |||||||||||||| ||||| ||||| |||||||| ||||||</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>4933992</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>16</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>6353949</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>17</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>6353949</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>18</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>15</Statistics_hsp-len>\n+      <Statistics_eff-space>6353949</Statistics_eff-space>\n+      <Statistics_kappa>0.46</Statistics_kappa>\n+      <Statistics_lambda>1.28</Statistics_lambda>\n+      <Statistics_entropy>0.85</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n'
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastn_rhodopsin_vs_three_human_converted.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_rhodopsin_vs_three_human_converted.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 4e-132 460
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 3e-93 331
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 3e-73 265
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 3e-68 248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastp_four_human_vs_rhodopsin.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastp_four_human_vs_rhodopsin.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,6 +1,6 @@
-sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0  701
-sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0  619
-sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0  653
-sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0  631
-sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0  673
-sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0  599
+sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 96.55 348 12 0 1 348 1 348 0.0   701
+sp|P08100|OPSD_HUMAN gi|3024260|sp|P56514.1|OPSD_BUFBU 84.80 342 51 1 1 341 1 342 0.0   619
+sp|P08100|OPSD_HUMAN gi|283855846|gb|ADB45242.1| 94.82 328 17 0 11 338 1 328 0.0   653
+sp|P08100|OPSD_HUMAN gi|283855823|gb|ADB45229.1| 94.82 328 17 0 11 338 1 328 0.0   631
+sp|P08100|OPSD_HUMAN gi|223523|prf||0811197A 93.10 348 23 1 1 348 1 347 0.0   673
+sp|P08100|OPSD_HUMAN gi|12583665|dbj|BAB21486.1| 82.16 342 60 1 1 341 1 342 0.0   599
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastp_four_human_vs_rhodopsin.xml
--- a/test-data/blastp_four_human_vs_rhodopsin.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastp_four_human_vs_rhodopsin.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>blastp</BlastOutput_program>
-  <BlastOutput_version>BLASTP 2.2.28+</BlastOutput_version>
+  <BlastOutput_version>BLASTP 2.2.29+</BlastOutput_version>
   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastp_four_human_vs_rhodopsin_ext.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Fri Mar 14 07:40:46 2014 -0400
b
b'@@ -1,6 +1,6 @@\n-sp|P08100|OPSD_HUMAN\tgi|57163783|ref|NP_001009242.1|\t96.55\t348\t12\t0\t1\t348\t1\t348\t0.0\t 701\tgi|57163783|ref|NP_001009242.1|\t1808\t336\t343\t0\t98.56\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\t348\t348\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|3024260|sp|P56514.1|OPSD_BUFBU\t84.80\t342\t51\t1\t1\t341\t1\t342\t0.0\t 619\tgi|3024260|sp|P56514.1|OPSD_BUFBU\t1595\t290\t322\t1\t94.15\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t354\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|283855846|gb|ADB45242.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t 653\tgi|283855846|gb|ADB45242.1|\t1684\t311\t321\t0\t97.87\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|283855823|gb|ADB45229.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t 631\tgi|283855823|gb|ADB45229.1|\t1627\t311\t323\t0\t98.48\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|223523|prf||0811197A\t93.10\t348\t23\t1\t1\t348\t1\t347\t0.0\t 673\tgi|223523|prf||0811197A\t1736\t324\t336\t1\t96.55\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSR'..b'NGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t354\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855846|gb|ADB45242.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t  653\tgi|283855846|gb|ADB45242.1|\t1684\t311\t321\t0\t97.87\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855823|gb|ADB45229.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t  631\tgi|283855823|gb|ADB45229.1|\t1627\t311\t323\t0\t98.48\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|223523|prf||0811197A\t93.10\t348\t23\t1\t1\t348\t1\t347\t0.0\t  673\tgi|223523|prf||0811197A\t1736\t324\t336\t1\t96.55\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t347\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|12583665|dbj|BAB21486.1|\t82.16\t342\t60\t1\t1\t341\t1\t342\t0.0\t  599\tgi|12583665|dbj|BAB21486.1|\t1544\t281\t314\t1\t91.81\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE\t348\t354\tN/A\n'
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastp_rhodopsin_vs_four_human.tabular
--- a/test-data/blastp_rhodopsin_vs_four_human.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastp_rhodopsin_vs_four_human.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,6 +1,6 @@
-gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 348 1 348 0.0  679
-gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.33 354 53 2 1 354 1 348 0.0  605
-gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0  630
-gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0  630
-gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.10 348 23 1 1 347 1 348 0.0  651
-gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.09 349 65 1 1 349 1 348 0.0  587
+gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 348 1 348 0.0   701
+gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.33 354 53 2 1 354 1 348 0.0   605
+gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0   630
+gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.82 328 17 0 1 328 11 338 0.0   630
+gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.10 348 23 1 1 347 1 348 0.0   651
+gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.09 349 65 1 1 349 1 348 0.0   587
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastx_rhodopsin_vs_four_human.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,10 +1,10 @@
-gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0  639
-gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0  551
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67  220
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35  127
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33  121
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0   639
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0   551
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67   220
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35   127
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33   121
 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1
 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0
-gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0  589
-gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0  619
-gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0  532
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0   589
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0   619
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0   532
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human.xml
--- a/test-data/blastx_rhodopsin_vs_four_human.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastx_rhodopsin_vs_four_human.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>blastx</BlastOutput_program>
-  <BlastOutput_version>BLASTX 2.2.28+</BlastOutput_version>
+  <BlastOutput_version>BLASTX 2.2.29+</BlastOutput_version>
   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
@@ -307,9 +307,9 @@
   <Hit_hsps>
     <Hsp>
       <Hsp_num>1</Hsp_num>
-      <Hsp_bit-score>220.32</Hsp_bit-score>
-      <Hsp_score>560</Hsp_score>
-      <Hsp_evalue>4.29169e-67</Hsp_evalue>
+      <Hsp_bit-score>220.705</Hsp_bit-score>
+      <Hsp_score>561</Hsp_score>
+      <Hsp_evalue>3.21377e-67</Hsp_evalue>
       <Hsp_query-from>1</Hsp_query-from>
       <Hsp_query-to>333</Hsp_query-to>
       <Hsp_hit-from>11</Hsp_hit-from>
@@ -345,9 +345,9 @@
     </Hsp>
     <Hsp>
       <Hsp_num>3</Hsp_num>
-      <Hsp_bit-score>121.324</Hsp_bit-score>
-      <Hsp_score>303</Hsp_score>
-      <Hsp_evalue>1.96633e-33</Hsp_evalue>
+      <Hsp_bit-score>121.709</Hsp_bit-score>
+      <Hsp_score>304</Hsp_score>
+      <Hsp_evalue>1.62516e-33</Hsp_evalue>
       <Hsp_query-from>2855</Hsp_query-from>
       <Hsp_query-to>3031</Hsp_query-to>
       <Hsp_hit-from>177</Hsp_hit-from>
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human_all.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastx_rhodopsin_vs_four_human_all.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,10 @@
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0   639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|57163782|ref|NM_001009242.1| gi|57163782|ref|NM_001009242.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA71FL4TS79VA9ML34VI71LI7SASA21LI13TA1GV11 99 99 N/A N/A N/A N/A N/A
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0   551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 N/A 0 gi|2734705|gb|U59921.1|BBU59921 gi|2734705|gb|U59921.1|BBU59921 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 10IV1MF2KA20YF1IM1CA7LV7ML5IV17FV1NDHL4CG3VS1ML1ST1MLNH3IVLF1AP3YNVL36SG4VI7IV3SA2VA3LA9ML5VI26LM8RQ2CF30VIFA13FY2STNH3EN7VI7SASA9LM8IL2LI6FL1ED1DE 63 63 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67   220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA9HY61FL4TS28 22 8 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35   127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 18LV3GS19LI7SASA13 22 5 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33   121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 6ML34VI14VARASA 22 4 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 3/0 1AG36LV1LF13VA4 22 4 N/A N/A N/A N/A N/A
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 N/A 0 gi|283855845|gb|GQ290303.1| gi|283855845|gb|GQ290303.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 9LI13TA1AV 22 2 N/A N/A N/A N/A N/A
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0   589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 N/A 0 gi|283855822|gb|GQ290312.1| gi|283855822|gb|GQ290312.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 5KA66ND4FL4TS5ML7AP49LV25ML34VI40VI6LV19VI11SASA21LI13TA1 99 99 N/A N/A N/A N/A N/A
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0   619 sp|P08100|OPSD_HUMAN 1596 325 337 0 96.84 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 N/A 0 gi|18148870|dbj|AB062417.1| gi|18148870|dbj|AB062417.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 1/0 15KA9AY22MV38FL4TS79VA9ML10PLHKEP1TV14IT2LM1VI47LV3GS11DN14TSSA1VI17VL2LI13TA13 99 99 N/A N/A N/A N/A N/A
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0   532 sp|P08100|OPSD_HUMAN 1371 272 307 0 92.19 2 0 MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA 1344 348 N/A 0 gi|12583664|dbj|AB043817.1| gi|12583664|dbj|AB043817.1| 0 0 sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN sp|P08100|OPSD_HUMAN N/A N/A 2/0 10IV1MF22AQ2AM1SA4FL2IVAL12IVEQ23FL4TS1ML3ML12IL14CS8WYMV5VMTS6SN6MAVF2TV6LA3FA20RLAK1GEIV10TVCV2ST2LMAIVI1SF4RQ2CF12EA3RK2RK6VI4SA2VI9WF8TN9SA5SA1LI3MV3CM6HN2IL2LI6FLEGEDEDDEGA 74 74 N/A N/A N/A N/A N/A
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human_converted.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,6 +1,6 @@
 gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639
 gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220
 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127
 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121
 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,8 +1,8 @@
 gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639 sp|P08100|OPSD_HUMAN 1647 336 343 0 98.56 1 0 MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 1047 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
 gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551 sp|P08100|OPSD_HUMAN 1419 283 315 0 94.88 3 0 MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE 1574 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220 sp|P08100|OPSD_HUMAN 560 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 3e-67 220 sp|P08100|OPSD_HUMAN 561 107 109 0 98.20 1 0 VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127 sp|P08100|OPSD_HUMAN 319 60 64 0 98.46 3 0 KEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ KEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 303 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121 sp|P08100|OPSD_HUMAN 304 54 57 0 96.61 2 0 RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS RYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1 sp|P08100|OPSD_HUMAN 240 55 56 0 94.92 3 0 LAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR LGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
 gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0 sp|P08100|OPSD_HUMAN 136 23 24 0 92.31 1 0 QFRNCMLTTLCCGKNPLGDDEASTTA QFRNCMLTTICCGKNPLGDDEASATV 4301 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
 gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589 sp|P08100|OPSD_HUMAN 1518 310 322 0 98.77 1 0 VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT 983 348 Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/blastx_rhodopsin_vs_four_human_ext.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Fri Mar 14 07:40:46 2014 -0400
b
b'@@ -1,10 +1,10 @@\n-gi|57163782|ref|NM_001009242.1|\tsp|P08100|OPSD_HUMAN\t96.55\t348\t12\t0\t1\t1044\t1\t348\t0.0\t 639\tsp|P08100|OPSD_HUMAN\t1647\t336\t343\t0\t98.56\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\tN/A\n-gi|2734705|gb|U59921.1|BBU59921\tsp|P08100|OPSD_HUMAN\t85.24\t332\t49\t0\t42\t1037\t1\t332\t0.0\t 551\tsp|P08100|OPSD_HUMAN\t1419\t283\t315\t0\t94.88\t3\t0\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE\t1574\t348\tN/A\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t96.40\t111\t4\t0\t1\t333\t11\t121\t4e-67\t 220\tsp|P08100|OPSD_HUMAN\t560\t107\t109\t0\t98.20\t1\t0\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG\t4301\t348\tN/A\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t92.31\t65\t5\t0\t3174\t3368\t248\t312\t2e-35\t 127\tsp|P08100|OPSD_HUMAN\t319\t60\t64\t0\t98.46\t3\t0\tKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ\tKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ\t4301\t348\tN/A\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t91.53\t59\t5\t0\t2855\t3031\t177\t235\t2e-33\t 121\tsp|P08100|OPSD_HUMAN\t303\t54\t57\t0\t96.61\t2\t0\tRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS\tRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA\t4301\t348\tN/A\n+gi|57163782|ref|NM_001009242.1|\tsp|P08100|OPSD_HUMAN\t96.55\t348\t12\t0\t1\t1044\t1\t348\t0.0\t  639\tsp|P08100|OPSD_HUMAN\t1647\t336\t343\t0\t98.56\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\tN/A\n+gi|2734705|gb|U59921.1|BBU59921\tsp|P08100|OPSD_HUMAN\t85.24\t332\t49\t0\t42\t1037\t1\t332\t0.0\t  551\tsp|P08100|OPSD_HUMAN\t1419\t283\t315\t0\t94.88\t3\t0\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILL'..b'0\t96.84\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\tN/A\n-gi|12583664|dbj|AB043817.1|\tsp|P08100|OPSD_HUMAN\t81.68\t333\t61\t0\t23\t1021\t1\t333\t0.0\t 532\tsp|P08100|OPSD_HUMAN\t1371\t272\t307\t0\t92.19\t2\t0\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA\t1344\t348\tN/A\n+gi|283855822|gb|GQ290312.1|\tsp|P08100|OPSD_HUMAN\t95.09\t326\t16\t0\t1\t978\t11\t336\t0.0\t  589\tsp|P08100|OPSD_HUMAN\t1518\t310\t322\t0\t98.77\t1\t0\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT\t983\t348\tN/A\n+gi|18148870|dbj|AB062417.1|\tsp|P08100|OPSD_HUMAN\t93.39\t348\t23\t0\t1\t1044\t1\t348\t0.0\t  619\tsp|P08100|OPSD_HUMAN\t1596\t325\t337\t0\t96.84\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\tN/A\n+gi|12583664|dbj|AB043817.1|\tsp|P08100|OPSD_HUMAN\t81.68\t333\t61\t0\t23\t1021\t1\t333\t0.0\t  532\tsp|P08100|OPSD_HUMAN\t1371\t272\t307\t0\t92.19\t2\t0\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA\t1344\t348\tN/A\n'
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/convert2blastmask_four_human_masked.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/convert2blastmask_four_human_masked.maskinfo-asn1 Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,158 @@
+Blast-db-mask-info ::= {
+  algo-id 0,
+  algo-program seg,
+  algo-options "window=12; locut=2.2; hicut=2.5",
+  masks {
+    masks {
+      int {
+        from 6,
+        to 18,
+        id swissprot {
+          name "ERP44_HUMAN",
+          accession "Q9BS26",
+          release "reviewed"
+        }
+      },
+      packed-int {
+        {
+          from 11,
+          to 46,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 325,
+          to 332,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 421,
+          to 496,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 501,
+          to 516,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 536,
+          to 558,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 636,
+          to 648,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 737,
+          to 762,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 789,
+          to 806,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 970,
+          to 983,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        },
+        {
+          from 999,
+          to 1010,
+          id swissprot {
+            name "BMP2K_HUMAN",
+            accession "Q9NSY1",
+            release "reviewed"
+          }
+        }
+      },
+      packed-int {
+        {
+          from 3,
+          to 26,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        },
+        {
+          from 372,
+          to 390,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        },
+        {
+          from 766,
+          to 791,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        },
+        {
+          from 1312,
+          to 1324,
+          id swissprot {
+            name "INSR_HUMAN",
+            accession "P06213",
+            release "reviewed"
+          }
+        }
+      },
+      int {
+        from 230,
+        to 246,
+        id swissprot {
+          name "OPSD_HUMAN",
+          accession "P08100",
+          release "reviewed"
+        }
+      }
+    },
+    more FALSE
+  }
+}
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary
b
Binary file test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary has changed
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins.fasta.log
--- a/test-data/four_human_proteins.fasta.log Tue Jan 21 13:37:01 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,10 +0,0 @@
-
-
-Building a new DB, current time: 11/21/2013 11:16:27
-New DB name:   /tmp/tmpnSjpCP/tmpwAbNo4/database/files/000/dataset_2_files/blastdb
-New DB title:  Just 4 human proteins
-Sequence type: Protein
-Keep Linkouts: T
-Keep MBits: T
-Maximum file size: 1000000000B
-Adding sequences from FASTA; added 4 sequences in 0.00202417 seconds.
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.log.txt Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,5 @@
+New DB title:  Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_masked.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_masked.fasta Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
+qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
+qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
+NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
+pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
+fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
+attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.log.txt Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,5 @@
+New DB title:  Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.phd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.phd Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,4 @@
+11117184492
+29249033410
+36665887501
+5392473183
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.phi
b
Binary file test-data/four_human_proteins_taxid.fasta.phi has changed
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.phr
b
Binary file test-data/four_human_proteins_taxid.fasta.phr has changed
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.pin
b
Binary file test-data/four_human_proteins_taxid.fasta.pin has changed
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.pog
b
Binary file test-data/four_human_proteins_taxid.fasta.pog has changed
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.psd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins_taxid.fasta.psd Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,4 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
+gnl|bl_ord_id|33
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.psi
b
Binary file test-data/four_human_proteins_taxid.fasta.psi has changed
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/four_human_proteins_taxid.fasta.psq
b
Binary file test-data/four_human_proteins_taxid.fasta.psq has changed
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/segmasker_four_human.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.fasta Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
+qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
+qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
+NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
+pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
+fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
+attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/segmasker_four_human.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/segmasker_four_human.maskinfo-asn1 Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,114 @@
+Blast-db-mask-info ::= {
+  algo-id 1,
+  algo-program seg,
+  algo-options "window=12; locut=2.2; hicut=2.5",
+  masks {
+    masks {
+      int {
+        from 6,
+        to 18,
+        id local id 1
+      },
+      packed-int {
+        {
+          from 11,
+          to 46,
+          id local id 2
+        },
+        {
+          from 325,
+          to 332,
+          id local id 2
+        },
+        {
+          from 421,
+          to 443,
+          id local id 2
+        },
+        {
+          from 437,
+          to 450,
+          id local id 2
+        },
+        {
+          from 447,
+          to 496,
+          id local id 2
+        },
+        {
+          from 501,
+          to 516,
+          id local id 2
+        },
+        {
+          from 536,
+          to 554,
+          id local id 2
+        },
+        {
+          from 545,
+          to 558,
+          id local id 2
+        },
+        {
+          from 636,
+          to 648,
+          id local id 2
+        },
+        {
+          from 737,
+          to 762,
+          id local id 2
+        },
+        {
+          from 789,
+          to 806,
+          id local id 2
+        },
+        {
+          from 970,
+          to 983,
+          id local id 2
+        },
+        {
+          from 999,
+          to 1010,
+          id local id 2
+        }
+      },
+      packed-int {
+        {
+          from 3,
+          to 26,
+          id local id 3
+        },
+        {
+          from 372,
+          to 390,
+          id local id 3
+        },
+        {
+          from 766,
+          to 782,
+          id local id 3
+        },
+        {
+          from 780,
+          to 791,
+          id local id 3
+        },
+        {
+          from 1312,
+          to 1324,
+          id local id 3
+        }
+      },
+      int {
+        from 230,
+        to 246,
+        id local id 4
+      }
+    },
+    more FALSE
+  }
+}
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/segmasker_four_human.maskinfo-asn1-binary
b
Binary file test-data/segmasker_four_human.maskinfo-asn1-binary has changed
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastn_four_human_vs_rhodopsin.html
--- a/test-data/tblastn_four_human_vs_rhodopsin.html Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastn_four_human_vs_rhodopsin.html Fri Mar 14 07:40:46 2014 -0400
b
@@ -3,7 +3,7 @@
 <BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
 <PRE>
 
-<b>TBLASTN 2.2.28+</b>
+<b>TBLASTN 2.2.29+</b>
 
 
 <b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44
@@ -461,8 +461,8 @@
 
 Length=1047
 
-<script src="blastResult.js"></script>
- Score =  732 bits (1689),  Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score =   732 bits (1689),  Expect = 0.0, Method: Compositional matrix adjust.
  Identities = 336/348 (97%), Positives = 343/348 (99%), Gaps = 0/348 (0%)
  Frame = +1
 
@@ -511,8 +511,8 @@
 
 Length=1574
 
-<script src="blastResult.js"></script>
- Score =  646 bits (1489),  Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score =   646 bits (1489),  Expect = 0.0, Method: Compositional matrix adjust.
  Identities = 290/342 (85%), Positives = 320/342 (94%), Gaps = 1/342 (0%)
  Frame = +3
 
@@ -561,8 +561,8 @@
 
 Length=4301
 
-<script src="blastResult.js"></script>
- Score =  151 bits (342),  Expect(2) = 1e-72, Method: Compositional matrix adjust.
+
+ Score =   151 bits (342),  Expect(2) = 1e-72, Method: Compositional matrix adjust.
  Identities = 69/74 (93%), Positives = 73/74 (99%), Gaps = 0/74 (0%)
  Frame = +3
 
@@ -575,7 +575,7 @@
 Sbjct  3327  SIYNPVIYIMMNKQ  3368
 
 
- Score =  126 bits (284),  Expect(2) = 1e-72, Method: Compositional matrix adjust.
+ Score =   126 bits (284),  Expect(2) = 1e-72, Method: Compositional matrix adjust.
  Identities = 54/59 (92%), Positives = 57/59 (97%), Gaps = 0/59 (0%)
  Frame = +2
 
@@ -584,7 +584,7 @@
 Sbjct  2855  RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS  3031
 
 
- Score =  229 bits (523),  Expect = 9e-67, Method: Compositional matrix adjust.
+ Score =   229 bits (523),  Expect = 9e-67, Method: Compositional matrix adjust.
  Identities = 107/111 (96%), Positives = 109/111 (98%), Gaps = 0/111 (0%)
  Frame = +1
 
@@ -597,7 +597,7 @@
 Sbjct  181  PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG  333
 
 
- Score =  122 bits (276),  Expect = 1e-32, Method: Compositional matrix adjust.
+ Score =   122 bits (276),  Expect = 1e-32, Method: Compositional matrix adjust.
  Identities = 55/59 (93%), Positives = 56/59 (95%), Gaps = 0/59 (0%)
  Frame = +3
 
@@ -635,8 +635,8 @@
 
 Length=983
 
-<script src="blastResult.js"></script>
- Score =  658 bits (1517),  Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score =   658 bits (1517),  Expect = 0.0, Method: Compositional matrix adjust.
  Identities = 310/326 (95%), Positives = 322/326 (99%), Gaps = 0/326 (0%)
  Frame = +1
 
@@ -685,8 +685,8 @@
 
 Length=1047
 
-<script src="blastResult.js"></script>
- Score =  711 bits (1640),  Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score =   711 bits (1640),  Expect = 0.0, Method: Compositional matrix adjust.
  Identities = 325/348 (93%), Positives = 337/348 (97%), Gaps = 0/348 (0%)
  Frame = +1
 
@@ -735,8 +735,8 @@
 
 Length=1344
 
-<script src="blastResult.js"></script>
- Score =  626 bits (1444),  Expect = 0.0, Method: Compositional matrix adjust.
+
+ Score =   626 bits (1444),  Expect = 0.0, Method: Compositional matrix adjust.
  Identities = 281/342 (82%), Positives = 311/342 (91%), Gaps = 1/342 (0%)
  Frame = +2
 
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastn_four_human_vs_rhodopsin.tabular
--- a/test-data/tblastn_four_human_vs_rhodopsin.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastn_four_human_vs_rhodopsin.tabular Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,10 +1,10 @@
-sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0  732
-sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0  646
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72  151
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72  126
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67  229
-sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32  122
+sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.55 348 12 0 1 348 1 1044 0.0   732
+sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.80 342 51 1 1 341 42 1067 0.0   646
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.24 74 5 0 239 312 3147 3368 1e-72   151
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.53 59 5 0 177 235 2855 3031 1e-72   126
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.40 111 4 0 11 121 1 333 9e-67   229
+sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.22 59 4 0 119 177 1404 1580 1e-32   122
 sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.46 26 3 0 312 337 4222 4299 2e-12 57.7
-sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0  658
-sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0  711
-sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0  626
+sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.09 326 16 0 11 336 1 978 0.0   658
+sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.39 348 23 0 1 348 1 1044 0.0   711
+sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.16 342 60 1 1 341 23 1048 0.0   626
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastn_four_human_vs_rhodopsin.xml
--- a/test-data/tblastn_four_human_vs_rhodopsin.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>tblastn</BlastOutput_program>
-  <BlastOutput_version>TBLASTN 2.2.28+</BlastOutput_version>
+  <BlastOutput_version>TBLASTN 2.2.29+</BlastOutput_version>
   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastn_four_human_vs_rhodopsin_ext.tabular
--- a/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Fri Mar 14 07:40:46 2014 -0400
b
b'@@ -1,10 +1,10 @@\n-sp|P08100|OPSD_HUMAN\tgi|57163782|ref|NM_001009242.1|\t96.55\t348\t12\t0\t1\t348\t1\t1044\t0.0\t 732\tgi|57163782|ref|NM_001009242.1|\t1689\t336\t343\t0\t98.56\t0\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\t348\t1047\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|2734705|gb|U59921.1|BBU59921\t84.80\t342\t51\t1\t1\t341\t42\t1067\t0.0\t 646\tgi|2734705|gb|U59921.1|BBU59921\t1489\t290\t320\t1\t93.57\t0\t3\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t1574\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t93.24\t74\t5\t0\t239\t312\t3147\t3368\t1e-72\t 151\tgi|283855845|gb|GQ290303.1|\t342\t69\t73\t0\t98.65\t0\t3\tESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ\tESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ\t348\t4301\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t91.53\t59\t5\t0\t177\t235\t2855\t3031\t1e-72\t 126\tgi|283855845|gb|GQ290303.1|\t284\t54\t57\t0\t96.61\t0\t2\tRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA\tRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS\t348\t4301\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t96.40\t111\t4\t0\t11\t121\t1\t333\t9e-67\t 229\tgi|283855845|gb|GQ290303.1|\t523\t107\t109\t0\t98.20\t0\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG\t348\t4301\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t93.22\t59\t4\t0\t119\t177\t1404\t1580\t1e-32\t 122\tgi|283855845|gb|GQ290303.1|\t276\t55\t56\t0\t94.92\t0\t3\tLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR\tLAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR\t348\t4301\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|57163782|ref|NM_001009242.1|\t96.55\t348\t12\t0\t1\t348\t1\t1044\t0.0\t  732\tgi|57163782|ref|NM_001009242.1|\t1689\t336\t343\t0\t98.56\t0\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\t348\t1047\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|2734705|gb|U59921.1|BBU59921\t84.80\t342\t51\t1\t1\t341\t42\t1067\t0.0\t  646\tgi|2734705|gb|U59921.1|BBU59921\t1489\t290\t320\t1\t93.57\t0\t3\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLA'..b'PINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t1047\tN/A\n-sp|P08100|OPSD_HUMAN\tgi|12583664|dbj|AB043817.1|\t82.16\t342\t60\t1\t1\t341\t23\t1048\t0.0\t 626\tgi|12583664|dbj|AB043817.1|\t1444\t281\t311\t1\t90.94\t0\t2\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE\t348\t1344\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855822|gb|GQ290312.1|\t95.09\t326\t16\t0\t11\t336\t1\t978\t0.0\t  658\tgi|283855822|gb|GQ290312.1|\t1517\t310\t322\t0\t98.77\t0\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT\t348\t983\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|18148870|dbj|AB062417.1|\t93.39\t348\t23\t0\t1\t348\t1\t1044\t0.0\t  711\tgi|18148870|dbj|AB062417.1|\t1640\t325\t337\t0\t96.84\t0\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t1047\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|12583664|dbj|AB043817.1|\t82.16\t342\t60\t1\t1\t341\t23\t1048\t0.0\t  626\tgi|12583664|dbj|AB043817.1|\t1444\t281\t311\t1\t90.94\t0\t2\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE\t348\t1344\tN/A\n'
b
diff -r 6560192c5098 -r 623f727cdff1 test-data/tblastx_rhodopsin_vs_three_human.tabular
--- a/test-data/tblastx_rhodopsin_vs_three_human.tabular Tue Jan 21 13:37:01 2014 -0500
+++ b/test-data/tblastx_rhodopsin_vs_three_human.tabular Fri Mar 14 07:40:46 2014 -0400
b
b'@@ -1,57 +1,57 @@\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t97.39\t230\t6\t0\t1\t690\t88\t777\t0.0\t 559\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.12\t102\t6\t0\t742\t1047\t829\t1134\t0.0\t 236\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t91.22\t148\t13\t0\t1046\t603\t1133\t690\t0.0\t 308\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.32\t88\t5\t0\t566\t303\t653\t390\t0.0\t 207\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t96.34\t82\t3\t0\t248\t3\t335\t90\t0.0\t 182\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t83.33\t204\t34\t0\t18\t629\t105\t716\t4e-158\t 404\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t75.28\t89\t22\t0\t780\t1046\t867\t1133\t4e-158\t 161\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t81.28\t203\t38\t0\t609\t1\t696\t88\t5e-153\t 360\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t80.60\t67\t13\t0\t916\t716\t1003\t803\t5e-153\t 135\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t97.39\t230\t6\t0\t1\t690\t88\t777\t0.0\t  559\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.12\t102\t6\t0\t742\t1047\t829\t1134\t0.0\t  236\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t91.22\t148\t13\t0\t1046\t603\t1133\t690\t0.0\t  308\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.32\t88\t5\t0\t566\t303\t653\t390\t0.0\t  207\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t96.34\t82\t3\t0\t248\t3\t335\t90\t0.0\t  182\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t83.33\t204\t34\t0\t18\t629\t105\t716\t4e-158\t  404\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t75.28\t89\t22\t0\t780\t1046\t867\t1133\t4e-158\t  161\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t81.28\t203\t38\t0\t609\t1\t696\t88\t5e-153\t  360\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t80.60\t67\t13\t0\t916\t716\t1003\t803\t5e-153\t  135\n gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t70.27\t37\t11\t0\t1047\t937\t1134\t1024\t5e-153\t64.2\n gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t100.00\t7\t0\t0\t646\t626\t733\t713\t5e-153\t24.0\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t89.23\t65\t7\t0\t460\t266\t547\t353\t4e-105\t 167\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t89.58\t48\t5\t0\t184\t41\t271\t128\t4e-105\t 104\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t89.23\t65\t7\t0\t460\t266\t547\t353\t4e-105\t  167\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t89.58\t48\t5\t0\t184\t41\t271\t128\t4e-105\t  104\n gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t77.78\t45\t10\t0\t882\t748\t969\t835\t4e-105\t93.9\n gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t67.86\t28\t9\t0\t1045\t962\t1132\t1049\t4e-105\t51.9\n gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t59.09\t22\t9\t0\t586\t521\t673\t608\t4e-105\t33.1\n-gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t81.40\t86\t16\t0\t296\t553\t383\t640\t2e-87\t 185\n+gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t81.40\t86\t16\t0\t296\t553\t383\t640\t2e-87\t  185\n gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t84.38\t32\t5\t0\t11\t106\t98\t193\t2e-87\t74.8\n gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t71.43\t35\t10\t0\t941\t1045\t1028\t1132\t2e-87\t61.6\n gi|57163782|ref|NM_001009242.1|\tENA|BC112106|BC112106.1\t94.44\t18\t1\t0\t794\t847\t881\t934\t2e-87\t50.1\n-gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t83.61\t238\t39\t0\t18\t731\t64\t777\t0.0\t 507\n-gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t82.35\t85\t15\t0\t783\t1037\t829\t1083\t0.0\t 188\n-gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t70.96\t303\t88\t0\t925\t17\t971\t63\t2e-130\t 435\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t83.61\t238\t39\t0\t18\t731\t64\t777\t0.0\t  507\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t82.35\t85\t15\t0\t783\t1037\t829\t1083\t0.0\t  188\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t70.96\t303\t88\t0\t925\t17\t971\t63\t2e-130\t  435\n gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t72.22\t18\t5\t0\t1027\t974\t1073\t1020\t2e-130\t35.0\n-gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t55.32\t188\t84\t0\t605\t42\t651\t88\t7e-89\t 245\n+gi|2734705|gb|U59921.1|BBU59921\tENA|BC112106|BC112106.1\t55.3'..b'-87,16 +87,16 @@\n gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t81.40\t43\t8\t0\t404\t532\t521\t649\t4e-48\t47.3\n gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t88.89\t18\t2\t0\t764\t817\t881\t934\t4e-48\t44.6\n gi|283855822|gb|GQ290312.1|\tENA|BC112106|BC112106.1\t87.50\t8\t1\t0\t935\t958\t1052\t1075\t4e-48\t21.7\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t93.91\t230\t14\t0\t1\t690\t88\t777\t0.0\t 538\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t91.18\t102\t9\t0\t742\t1047\t829\t1134\t0.0\t 233\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t88.83\t188\t21\t0\t566\t3\t653\t90\t0.0\t 394\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t84.06\t138\t22\t0\t1046\t633\t1133\t720\t0.0\t 260\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t67.11\t228\t75\t0\t684\t1\t771\t88\t7e-132\t 333\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t67.27\t110\t36\t0\t1045\t716\t1132\t803\t7e-132\t 141\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t70.20\t151\t45\t0\t3\t455\t90\t542\t1e-128\t 236\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t64.04\t89\t32\t0\t780\t1046\t867\t1133\t1e-128\t 136\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.22\t74\t25\t0\t510\t731\t597\t818\t1e-128\t 111\n-gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.04\t106\t36\t0\t242\t559\t329\t646\t2e-58\t 161\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t93.91\t230\t14\t0\t1\t690\t88\t777\t0.0\t  538\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t91.18\t102\t9\t0\t742\t1047\t829\t1134\t0.0\t  233\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t88.83\t188\t21\t0\t566\t3\t653\t90\t0.0\t  394\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t84.06\t138\t22\t0\t1046\t633\t1133\t720\t0.0\t  260\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t67.11\t228\t75\t0\t684\t1\t771\t88\t7e-132\t  333\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t67.27\t110\t36\t0\t1045\t716\t1132\t803\t7e-132\t  141\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t70.20\t151\t45\t0\t3\t455\t90\t542\t1e-128\t  236\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t64.04\t89\t32\t0\t780\t1046\t867\t1133\t1e-128\t  136\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.22\t74\t25\t0\t510\t731\t597\t818\t1e-128\t  111\n+gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.04\t106\t36\t0\t242\t559\t329\t646\t2e-58\t  161\n gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t85.71\t21\t3\t0\t92\t154\t179\t241\t2e-58\t53.8\n gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t73.68\t19\t5\t0\t791\t847\t878\t934\t2e-58\t39.1\n gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t61.29\t62\t24\t0\t424\t239\t511\t326\t4e-55\t81.3\n@@ -104,11 +104,11 @@\n gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t65.71\t35\t12\t0\t882\t778\t969\t865\t4e-55\t56.3\n gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t58.14\t43\t18\t0\t649\t521\t736\t608\t4e-55\t50.6\n gi|18148870|dbj|AB062417.1|\tENA|BC112106|BC112106.1\t66.67\t12\t4\t0\t972\t937\t1059\t1024\t4e-55\t23.9\n-gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t82.13\t235\t42\t0\t11\t715\t76\t780\t0.0\t 498\n-gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t78.31\t83\t18\t0\t770\t1018\t835\t1083\t0.0\t 177\n-gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t72.29\t332\t92\t0\t1017\t22\t1082\t87\t1e-150\t 516\n-gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t48.30\t147\t76\t0\t712\t272\t777\t337\t2e-98\t 169\n-gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t54.17\t72\t33\t0\t1030\t815\t1095\t880\t2e-98\t 103\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t82.13\t235\t42\t0\t11\t715\t76\t780\t0.0\t  498\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t78.31\t83\t18\t0\t770\t1018\t835\t1083\t0.0\t  177\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t72.29\t332\t92\t0\t1017\t22\t1082\t87\t1e-150\t  516\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t48.30\t147\t76\t0\t712\t272\t777\t337\t2e-98\t  169\n+gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t54.17\t72\t33\t0\t1030\t815\t1095\t880\t2e-98\t  103\n gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t47.83\t69\t36\t0\t220\t14\t285\t79\t2e-98\t83.5\n gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t72.00\t25\t7\t0\t782\t708\t847\t773\t2e-98\t45.1\n gi|12583664|dbj|AB043817.1|\tENA|BC112106|BC112106.1\t56.00\t75\t33\t0\t532\t756\t597\t821\t5e-65\t87.7\n'
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/README.rst Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,9 +1,8 @@
 Galaxy wrappers for NCBI BLAST+ suite
 =====================================
 
-These wrappers are copyright 2010-2013 by Peter Cock, The James Hutton Institute
-(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
-See the licence text below.
+These wrappers are copyright 2010-2013 by Peter Cock (The James Hutton Institute,
+UK) and additional contributors. All rights reserved. See the licence text below.
 
 Currently tested with NCBI BLAST 2.2.28+ (i.e. version 2.2.28 of BLAST+),
 and does not work with the NCBI 'legacy' BLAST suite (e.g. ``blastall``).
@@ -26,17 +25,7 @@
 (``blastxml``) and protein and nucleotide BLAST databases (``blastdbp`` and
 ``blastdbn``).
 
-You must tell Galaxy about any system level BLAST databases using configuration
-files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
-databases like NR), and blastdb_d.loc (protein domain databases like CDD or
-SMART) which are located in the tool-data/ folder. Sample files are included
-which explain the tab-based format to use.
-
-You can download the NCBI provided databases as tar-balls from here:
-
-* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR)
-* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD)
-
+See the configuration notes below.
 
 Manual Installation
 ===================
@@ -79,6 +68,39 @@
 
     ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools
 
+Configuration
+=============
+
+You must tell Galaxy about any system level BLAST databases using configuration
+files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
+databases like NR), and blastdb_d.loc (protein domain databases like CDD or
+SMART) which are located in the tool-data/ folder. Sample files are included
+which explain the tab-based format to use.
+
+You can download the NCBI provided databases as tar-balls from here:
+
+* ftp://ftp.ncbi.nlm.nih.gov/blast/db/ (nucleotide and protein databases like NR)
+* ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ (domain databases like CDD)
+
+If using the optional taxonomy columns, you will also need to download the
+NCBI taxonomy files (``taxdb.btd`` and ``taxdb.bti`` from ``taxdb.tar.gz`` on
+the BLAST database FTP site). Currently explicit version tracking of the
+taxonomy is not supported, and in order to use this you must set the
+``$BLASTDB`` environment variable to include the path where you unzipped the
+taxonomy files. If this is not done, the taxonomy columns like species name
+will appear as ``N/A`` in the tabular output.
+
+The BLAST+ binaries support multi-threaded operation, which is handled via the
+$GALAXY_SLOTS environment variable. This should be set automatically by Galaxy
+via your job runner settings, which allows you to (for example) allocate four
+cores to each BLAST job.
+
+In addition, the BLAST+ wrappers also support high level parallelism by task
+splitting if ``use_tasked_jobs = True`` is enabled in your ``universe_wsgi.ini``
+configuration file. Essentially, the FASTA input query files are broken up into
+batches of 1000 sequences, a separate BLAST child job is run for each chunk,
+and then the BLAST output files are merged (in order). This is transparent
+for the end user.
 
 History
 =======
@@ -106,7 +128,7 @@
           (all too often our users where having to re-run searches just to
           get one of the missing columns like query or subject length)
 v0.0.18 - Defensive quoting of filenames in case of spaces (where possible,
-          BLAST+ handling of some mult-file arguments is problematic).
+          BLAST+ handling of some multi-file arguments is problematic).
 v0.0.19 - Added wrappers for rpsblast and rpstblastn, and new blastdb_d.loc
           for the domain databases they use (e.g. CDD, PFAM or SMART).
         - Correct case of exception regular expression (for error handling
@@ -122,20 +144,30 @@
         - Development moved to GitHub, https://github.com/peterjc/galaxy_blast
         - Updated citation information (Cock et al. 2013).
 v0.0.21 - Use macros to simplify the XML wrappers.
-        - Added wrapper for dustmasker
-        - Enabled masking for makeblastdb
-        - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes
+        - Added wrapper for dustmasker.
+        - Enabled masking for makeblastdb.
+        - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes.
           defined in updated blast_datatypes on Galaxy ToolShed.
-        - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26
-        - Now depends on package_blast_plus_2_2_27 in ToolShed
-v0.0.22 - More use macros to simplify the wrappers
-        - Set number of threads via $GALAXY_SLOTS environment variable
-        - More descriptive default output names
-        - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18)
+        - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26.
+        - Now depends on package_blast_plus_2_2_27 in ToolShed.
+v0.0.22 - More use macros to simplify the wrappers.
+        - Set number of threads via $GALAXY_SLOTS environment variable.
+        - More descriptive default output names.
+        - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18).
         - Pre-check for duplicate identifiers in makeblastdb wrapper.
-        - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27
-        - Now depends on package_blast_plus_2_2_28 in ToolShed
+        - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27.
+        - Now depends on package_blast_plus_2_2_28 in ToolShed.
         - Extended tabular output includes 'salltitles' as column 25.
+v0.1.00 - Now depends on package_blast_plus_2_2_29 in ToolShed.
+        - Tabular output now includes option to pick specific columns,
+          including previously unavailable taxonomy columns.
+        - BLAST XML to tabular tool supports multiple input files.
+        - More detailed descriptions for BLASTN and BLASTP task option.
+        - Wrappers for segmasker, dustmasker and convert2blastmask.
+        - Supports using maskinfo with makeblastdb wrapper.
+        - Supports setting a taxonomy ID in makeblastdb wrapper.
+        - Subtle changes like new conditional settings will require some old
+          workflows be updated to cope. 
 ======= ======================================================================
 
 
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/blastxml_to_tabular.py
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Fri Mar 14 07:40:46 2014 -0400
[
b'@@ -62,9 +62,11 @@\n """\n import sys\n import re\n+import os\n+from optparse import OptionParser\n \n if "-v" in sys.argv or "--version" in sys.argv:\n-    print "v0.0.22"\n+    print "v0.1.00"\n     sys.exit(0)\n \n if sys.version_info[:2] >= ( 2, 5 ):\n@@ -81,34 +83,55 @@\n     sys.stderr.write("%s\\n" % msg)\n     sys.exit(1)\n \n-#Parse Command Line\n-try:\n-    in_file, out_file, out_fmt = sys.argv[1:]\n-except:\n-    stop_err("Expect 3 arguments: input BLAST XML file, output tabular file, out format (std or ext)")\n+if len(sys.argv) == 4 and sys.argv[3] in ["std", "x22", "ext"]:\n+    #False positive if user really has a BLAST XML file called \'std\' or \'ext\'...\n+    stop_err("ERROR: The script API has changed, sorry.")\n+\n+usage = """usage: %prog [options] blastxml[,...]\n+\n+Convert one (or more) BLAST XML files into a single tabular file.\n \n+The columns option can be \'std\' (standard 12 columns), \'ext\'\n+(extended 25 columns), or a list of BLAST+ column names like\n+\'qseqid,sseqid,pident\' (space or comma separated).\n+"""\n+parser = OptionParser(usage=usage)\n+parser.add_option(\'-o\', \'--output\', dest=\'output\', default=None, help=\'output filename (defaults to stdout)\', metavar="FILE")\n+parser.add_option("-c", "--columns", dest="columns", default=\'std\', help="[std|ext|col1,col2,...] standard 12 columns, extended 25 columns, or list of column names")\n+(options, args) = parser.parse_args()\n+\n+colnames = \'qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles\'.split(\',\')\n+\n+if len(args) < 1:\n+    stop_err("ERROR: No BLASTXML input files given; run with --help to see options.")\n+\n+out_fmt = options.columns\n if out_fmt == "std":\n     extended = False\n+    cols = None\n elif out_fmt == "x22":\n     stop_err("Format argument x22 has been replaced with ext (extended 25 columns)")\n elif out_fmt == "ext":\n     extended = True\n+    cols = None\n else:\n-    stop_err("Format argument should be std (12 column) or ext (extended 25 columns), not: %r" % out_fmt)\n-\n+    cols = out_fmt.replace(" ", ",").split(",") #Allow space or comma separated\n+    #Remove any blank entries due to trailing comma,\n+    #or annoying "None" dummy value from Galaxy if no columns\n+    cols = [c for c in cols if c and c != "None"]\n+    extra = set(cols).difference(colnames)\n+    if extra:\n+        stop_err("These are not recognised column names: %s" % ",".join(sorted(extra)))\n+    del extra\n+    assert set(colnames).issuperset(cols), cols\n+    if not cols:\n+        stop_err("No columns selected!")\n+    extended = max(colnames.index(c) for c in cols) >= 12 #Do we need any higher columns?\n+del out_fmt\n \n-# get an iterable\n-try: \n-    context = ElementTree.iterparse(in_file, events=("start", "end"))\n-except:\n-    stop_err("Invalid data format.")\n-# turn it into an iterator\n-context = iter(context)\n-# get the root element\n-try:\n-    event, root = context.next()\n-except:\n-    stop_err( "Invalid data format." )\n+for in_file in args:\n+    if not os.path.isfile(in_file):\n+        stop_err("Input BLAST XML file not found: %s" % in_file)\n \n \n re_default_query_id = re.compile("^Query_\\d+$")\n@@ -122,156 +145,187 @@\n assert not re_default_subject_id.match("TheSubject_1")\n \n \n-outfile = open(out_file, \'w\')\n-blast_program = None\n-for event, elem in context:\n-    if event == "end" and elem.tag == "BlastOutput_program":\n-        blast_program = elem.text\n-    # for every <Iteration> tag\n-    if event == "end" and elem.tag == "Iteration":\n-        #Expecting either this, from BLAST 2.2.25+ using FASTA vs FASTA\n-        # <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n-        # <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n-        # <Iteration_query-len>406</Iteration_query-len>\n-        # <Iteration_hits></Iteration_hits>\n-        #\n-        #Or, from BLAST 2.2.24+ run online\n-        # <Iteration_query-ID>'..b'ngth))\n+                        qframe = hsp.findtext("Hsp_query-frame")\n+                        sframe = hsp.findtext("Hsp_hit-frame")\n+                        if blast_program == "blastp":\n+                            #Probably a bug in BLASTP that they use 0 or 1 depending on format\n+                            if qframe == "0": qframe = "1"\n+                            if sframe == "0": sframe = "1"\n+                        slen = int(hit.findtext("Hit_len"))\n+                        values.extend([sallseqid,\n+                                       hsp.findtext("Hsp_score"), #score,\n+                                       nident,\n+                                       positive,\n+                                       hsp.findtext("Hsp_gaps"), #gaps,\n+                                       ppos,\n+                                       qframe,\n+                                       sframe,\n+                                       #NOTE - for blastp, XML shows original seq, tabular uses XXX masking\n+                                       q_seq,\n+                                       h_seq,\n+                                       str(qlen),\n+                                       str(slen),\n+                                       salltitles,\n+                                       ])\n+                    if cols:\n+                        #Only a subset of the columns are needed\n+                        values = [values[colnames.index(c)] for c in cols]\n+                    #print "\\t".join(values) \n+                    outfile.write("\\t".join(values) + "\\n")\n+            # prevents ElementTree from growing large datastructure\n+            root.clear()\n+            elem.clear()\n \n-                if extended:\n-                    try:\n-                        sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >"))\n-                        salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >"))\n-                    except IndexError as e:\n-                        stop_err("Problem splitting multuple hits?\\n%r\\n--> %s" % (hit_def, e))\n-                    #print hit_def, "-->", sallseqid\n-                    positive = hsp.findtext("Hsp_positive")\n-                    ppos = "%0.2f" % (100*float(positive)/float(length))\n-                    qframe = hsp.findtext("Hsp_query-frame")\n-                    sframe = hsp.findtext("Hsp_hit-frame")\n-                    if blast_program == "blastp":\n-                        #Probably a bug in BLASTP that they use 0 or 1 depending on format\n-                        if qframe == "0": qframe = "1"\n-                        if sframe == "0": sframe = "1"\n-                    slen = int(hit.findtext("Hit_len"))\n-                    values.extend([sallseqid,\n-                                   hsp.findtext("Hsp_score"), #score,\n-                                   nident,\n-                                   positive,\n-                                   hsp.findtext("Hsp_gaps"), #gaps,\n-                                   ppos,\n-                                   qframe,\n-                                   sframe,\n-                                   #NOTE - for blastp, XML shows original seq, tabular uses XXX masking\n-                                   q_seq,\n-                                   h_seq,\n-                                   str(qlen),\n-                                   str(slen),\n-                                   salltitles,\n-                                   ])\n-                #print "\\t".join(values) \n-                outfile.write("\\t".join(values) + "\\n")\n-        # prevents ElementTree from growing large datastructure\n-        root.clear()\n-        elem.clear()\n-outfile.close()\n+\n+if options.output:\n+    outfile = open(options.output, "w")\n+else:\n+    outfile = sys.stdout\n+\n+for in_file in args:\n+    blast_program = None\n+    convert(in_file, outfile)\n+\n+if options.output:\n+    outfile.close()\n+else:\n+    #Using stdout\n+    pass\n+\n'
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/blastxml_to_tabular.xml
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,8 +1,15 @@
-<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.22">
+<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.1.00">
     <description>Convert BLAST XML output to tabular</description>
     <version_command interpreter="python">blastxml_to_tabular.py --version</version_command>
     <command interpreter="python">
-      blastxml_to_tabular.py $blastxml_file $tabular_file $out_format
+blastxml_to_tabular.py -o "$tabular_file"
+#if $output.out_format == "cols":
+#set cols = (str($output.std_cols)+","+str($output.ext_cols)).replace("None", " ").replace(",,", ",").replace(",", " ")
+-c "$cols"
+#else
+-c "$output.out_format"
+#end if
+#for i in $blastxml_file#${i} #end for#
     </command>
     <stdio>
         <!-- Anything other than zero is an error -->
@@ -10,14 +17,50 @@
         <exit_code range=":-1" />
     </stdio>
     <inputs>
-        <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/> 
-        <param name="out_format" type="select" label="Output format">
-            <option value="std">Tabular (standard 12 columns)</option>
-            <option value="ext" selected="True">Tabular (extended 24 columns)</option>
-        </param>
+        <param name="blastxml_file" type="data" format="blastxml" multiple="true" label="BLAST results as XML"/>
+        <conditional name="output">
+          <param name="out_format" type="select" label="Output format">
+            <option value="std" selected="True">Tabular (standard 12 columns)</option>
+            <option value="ext">Tabular (extended 25 columns)</option>
+            <option value="cols">Tabular (select columns to output)</option>
+          </param>
+          <when value="std"/>
+          <when value="ext"/>
+          <when value="cols">
+            <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns">
+              <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option>
+              <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option>
+              <option selected="true" value="pident">pident = Percentage of identical matches</option>
+              <option selected="true" value="length">length = Alignment length</option>
+              <option selected="true" value="mismatch">mismatch = Number of mismatches</option>
+              <option selected="true" value="gapopen">gapopen = Number of gap openings</option>
+              <option selected="true" value="qstart">qstart = Start of alignment in query</option>
+              <option selected="true" value="qend">qend = End of alignment in query</option>
+              <option selected="true" value="sstart">sstart = Start of alignment in subject (database hit)</option>
+              <option selected="true" value="send">send = End of alignment in subject (database hit)</option>
+              <option selected="true" value="evalue">evalue = Expectation value (E-value)</option>
+              <option selected="true" value="bitscore">bitscore = Bit score</option>
+            </param>
+            <param name="ext_cols" type="select" multiple="true" display="checkboxes" label="Extended columns">
+              <option value="sallseqid">sallseqid = All subject Seq-id(s), separated by a ';'</option>
+              <option value="score">score = Raw score</option>
+              <option value="nident">nident = Number of identical matches</option>
+              <option value="positive">positive = Number of positive-scoring matches</option>
+              <option value="gaps">gaps = Total number of gaps</option>
+              <option value="ppos">ppos = Percentage of positive-scoring matches</option>
+              <option value="qframe">qframe = Query frame</option>
+              <option value="sframe">sframe = Subject frame</option>
+              <option value="qseq">qseq = Aligned part of query sequence</option>
+              <option value="sseq">sseq = Aligned part of subject sequence</option>
+              <option value="qlen">qlen = Query sequence length</option>
+              <option value="slen">slen = Subject sequence length</option>
+              <option value="salltitles">salltitles = All subject title(s), separated by a '&lt;&gt;'</option>
+            </param>
+          </when>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="tabular_file" format="tabular" label="$blastxml_file.display_name (as tabular)" />
+        <data name="tabular_file" format="tabular" label="$on_string (as tabular)" />
     </outputs>
     <requirements>
     </requirements>
@@ -80,6 +123,19 @@
             <param name="out_format" value="ext" />
             <output name="tabular_file" file="blastn_arabidopsis.extended.tabular" ftype="tabular" />
         </test>
+        <!-- there are some harmless white space differences in our conversion to the BLAST+ output here: -->
+        <test>
+            <param name="blastxml_file" value="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" />
+            <param name="out_format" value="std" />
+            <output name="tabular_file" file="blastn_rhodopsin_vs_three_human_converted.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="blastxml_file" value="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" />
+            <param name="out_format" value="cols" />
+            <param name="std_cols" value="qseqid,sseqid,pident" />
+            <param name="ext_cols" value="qlen,slen" />
+            <output name="tabular_file" file="blastn_rhodopsin_vs_three_human.columns.tabular" ftype="tabular" />
+        </test>
     </tests>
     <help>
     
@@ -120,7 +176,7 @@
 ====== ============= ===========================================
 Column NCBI name     Description
 ------ ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by ';'
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
     14 score         Raw score
     15 nident        Number of identical matches
     16 positive      Number of positive-scoring matches
@@ -132,7 +188,7 @@
     22 sseq          Aligned part of subject sequence
     23 qlen          Query sequence length
     24 slen          Subject sequence length
-    25 salltitles    All subject title(s), separated by '&lt;&gt;'
+    25 salltitles    All subject title(s), separated by a '&lt;&gt;'
 ====== ============= ===========================================
 
 Beware that the XML file (and thus the conversion) and the tabular output
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.22">
+<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.1.00">
     <description>Show BLAST database information from blastdbcmd</description>
     <macros>
         <token name="@BINARY@">blastdbcmd</token>
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.22">
+<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.1.00">
     <description>Extract sequence(s) from BLAST database</description>
     <macros>
         <token name="@BINARY@">blastdbcmd</token>
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.22">
+<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.1.00">
     <description>Search nucleotide database with nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
@@ -36,14 +36,16 @@
         <expand macro="input_conditional_nucleotide_db" />
 
         <param name="blast_type" type="select" display="radio" label="Type of BLAST">
-            <option value="megablast">megablast</option>
-            <option value="blastn">blastn</option>
-            <option value="blastn-short">blastn-short</option>
-            <option value="dc-megablast">dc-megablast</option>
+            <option value="megablast">megablast - Traditional megablast used to find very similar (e.g., intraspecies or closely related species) sequences</option>
+            <option value="blastn">blastn - Traditional BLASTN requiring an exact match of 11, for somewhat similar sequences</option>
+            <option value="blastn-short">blastn-short - BLASTN program optimized for sequences shorter than 50 bases</option>
+            <option value="dc-megablast">dc-megablast - Discontiguous megablast used to find more distant (e.g., interspecies) sequences</option>
             <!-- Using BLAST 2.2.24+ this gives an error:
             BLAST engine error: Program type 'vecscreen' not supported
             <option value="vecscreen">vecscreen</option>
+            In any case, vecscreen has gone in BLAST+ 2.2.28
             -->
+            <!-- BLAST+ 2.2.28 also offers rmblastn -->
         </param>
         <expand macro="input_evalue" />
         <expand macro="input_out_format" />
@@ -63,7 +65,7 @@
         </expand>
     </inputs>
     <outputs>
-        <data name="output1" format="tabular" label="${blast_type.value_label} $query.name vs @ON_DB_SUBJECT@">
+        <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@">
             <expand macro="output_change_format" />
         </data>
     </outputs>
@@ -74,10 +76,32 @@
             <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
             <param name="database" value="" />
             <param name="evalue_cutoff" value="1e-40" />
+            <param name="out_format" value="5" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastn_rhodopsin_vs_three_human.xml" ftype="blastxml" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-40" />
             <param name="out_format" value="6" />
             <param name="adv_opts_selector" value="basic" />
             <output name="output1" file="blastn_rhodopsin_vs_three_human.tabular" ftype="tabular" />
         </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-40" />
+            <param name="out_format" value="cols" />
+            <param name="std_cols" value="qseqid,sseqid,pident" />
+            <param name="ext_cols" value="qlen,slen" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastn_rhodopsin_vs_three_human.columns.tabular" ftype="tabular" />
+        </test>
     </tests>
     <help>
     
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.22">
+<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.1.00">
     <description>Search protein database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
@@ -34,8 +34,8 @@
         <expand macro="input_conditional_protein_db" />
 
         <param name="blast_type" type="select" display="radio" label="Type of BLAST">
-            <option value="blastp">blastp</option>
-            <option value="blastp-short">blastp-short</option>
+            <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option>
+            <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option>
         </param>
         <expand macro="input_evalue" />
         <expand macro="input_out_format" />
@@ -55,7 +55,7 @@
         </expand>
     </inputs>
     <outputs>
-        <data name="output1" format="tabular" label="${blast_type.value_label} $query.name vs @ON_DB_SUBJECT@">
+        <data name="output1" format="tabular" label="${blast_type.value} $query.name vs @ON_DB_SUBJECT@">
             <expand macro="output_change_format" />
         </data>
     </outputs>
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.22">
+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.1.00">
     <description>Search protein database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
@@ -83,6 +83,21 @@
             <param name="adv_opts_selector" value="basic" />
             <output name="output1" file="blastx_rhodopsin_vs_four_human_ext.tabular" ftype="tabular" />
         </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="cols" />
+            <param name="std_cols" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" />
+            <param name="ext_cols" value="sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles" />
+            <param name="ids_cols" value="qgi,qacc,qaccver,sallseqid,sgi,sallgi,sacc,saccver,sallacc,stitle" />
+            <param name="misc_cols" value="sstrand,frames,btop,qcovs,qcovhsp" />
+            <param name="tax_cols" value="staxids,sscinames,scomnames,sblastnames,sskingdoms" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastx_rhodopsin_vs_four_human_all.tabular" ftype="tabular" />
+        </test>
     </tests>
     <help>
     
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,87 @@
+<tool id="ncbi_convert2blastmask_wrapper" name="NCBI BLAST+ convert2blastmask" version="0.1.00">
+    <description>Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb</description>
+    <macros>
+        <token name="@BINARY@">convert2blastmask</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+convert2blastmask
+-in $infile
+-masking_algorithm "$masking_algorithm"
+-masking_options "$masking_options"
+$parse_seqids
+-out "$outfile"
+-outfmt $outformat
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="fasta" label="masked FASTA file"/> 
+        <param name="masking_algorithm" type="select" label="Used masking algorithm">
+            <option value="dust">DUST</option>
+            <option value="seg" selected="true">SEG</option>
+            <option value="windowmasker">windowmasker</option>
+            <option value="repeat">repeat</option>
+            <option value="other">other</option>
+        </param>
+        <param name="masking_options" type="text" value="" size="20" label="Masking algorithm options to create the masked input" 
+            help ="free text to describe the options used to create the masking files. (-masking_options)">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable" />
+            </sanitizer>
+        </param>
+        <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="true" label="Parse Seq-ids in FASTA input" help="(-parse_seqids)" />
+        <param name="outformat" type="select" label="Output format">
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text" selected="True">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo_xml</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="maskinfo-asn1" label="SEG Masked File">
+            <change_format>
+                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+ <!--
+                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+ -->
+                <when input="outformat" value="maskinfo_xml" format="xml" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" />
+            <param name="masking_algorithm" value="seg" />
+            <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" />
+            <param name="parse_seqids" value="True" />
+            <param name="outformat" value="maskinfo_asn1_bin" />
+            <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1-binary" />
+        </test>
+        <test>
+            <param name="infile" value="four_human_proteins_masked.fasta" ftype="fasta" />
+            <param name="masking_algorithm" value="seg" />
+            <param name="masking_options" value="window=12; locut=2.2; hicut=2.5" />
+            <param name="parse_seqids" value="True" />
+            <param name="outformat" value="maskinfo_asn1_text" />
+            <output name="outfile" file="convert2blastmask_four_human_masked.maskinfo-asn1" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+    </help>
+</tool>
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.0.22">
+<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.1.00">
     <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo -->
     <description>masks low complexity regions</description>
     <macros>
@@ -27,27 +27,24 @@
         <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" />
         <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" />
         <param name="outformat" type="select" label="Output format">
-<!-- acclist and maskinfo_xml are listed as possible output formats in
-     "dustmasker -help", but were not recognized by NCBI BLAST up to
-     release 2.2.27+. Fixed in BLAST 2.2.28+.
-     seqloc_* formats are not very useful -->
-<!--            <option value="acclist">acclist</option>-->
+            <!-- seqloc_* formats are not very useful
+                 and what BLAST+ calls 'interval' is not what Galaxy calls interval format
+            -->
             <option value="fasta">FASTA</option>
-            <option value="interval" selected="true">interval</option>
             <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
-            <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option>
-<!--            <option value="maskinfo_xml">maskinfo_xml</option>
-            <option value="seqloc_asn1_bin">seqloc_asn1_bin</option>
-            <option value="seqloc_asn1_text">seqloc_asn1_text</option>
-            <option value="seqloc_xml">seqloc_xml</option>-->
+            <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo_xml</option>
         </param>
     </inputs>
     <outputs>
-        <data name="outfile" format="interval" label="DUST Masked File">
+        <data name="outfile" format="maskinfo-asn1" label="DUST Masked File">
             <change_format>
                 <when input="outformat" value="fasta" format="fasta" />
                 <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+ <!--
                 <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+ -->
+                <when input="outformat" value="maskinfo_xml" format="xml" />
             </change_format>
         </data>
     </outputs>
@@ -83,13 +80,14 @@
     <help>
 **What it does**
 
-This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm.
+This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST_ algorithm.
 
 If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
 
 More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_.
 
 .. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+.. _DUST: http://www.ncbi.nlm.nih.gov/pubmed/16796549
 
 **References**
 
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_macros.xml
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Fri Mar 14 07:40:46 2014 -0400
b
b'@@ -1,30 +1,101 @@\n <macros>\n     <xml name="output_change_format">\n         <change_format>\n-            <when input="out_format" value="0" format="txt"/>\n-            <when input="out_format" value="0 -html" format="html"/>\n-            <when input="out_format" value="2" format="txt"/>\n-            <when input="out_format" value="2 -html" format="html"/>\n-            <when input="out_format" value="4" format="txt"/>\n-            <when input="out_format" value="4 -html" format="html"/>\n-            <when input="out_format" value="5" format="blastxml"/>\n+            <when input="output.out_format" value="0" format="txt"/>\n+            <when input="output.out_format" value="0 -html" format="html"/>\n+            <when input="output.out_format" value="2" format="txt"/>\n+            <when input="output.out_format" value="2 -html" format="html"/>\n+            <when input="output.out_format" value="4" format="txt"/>\n+            <when input="output.out_format" value="4 -html" format="html"/>\n+            <when input="output.out_format" value="5" format="blastxml"/>\n         </change_format>\n     </xml>\n     <xml name="input_out_format">\n-        <param name="out_format" type="select" label="Output format">\n-            <option value="6">Tabular (standard 12 columns)</option>\n-            <option value="ext" selected="True">Tabular (extended 25 columns)</option>\n-            <option value="5">BLAST XML</option>\n-            <option value="0">Pairwise text</option>\n-            <option value="0 -html">Pairwise HTML</option>\n-            <option value="2">Query-anchored text</option>\n-            <option value="2 -html">Query-anchored HTML</option>\n-            <option value="4">Flat query-anchored text</option>\n-            <option value="4 -html">Flat query-anchored HTML</option>\n-            <!--\n-            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>\n-            -->\n-        </param>\n+        <conditional name="output">\n+            <param name="out_format" type="select" label="Output format">\n+                <option value="6">Tabular (standard 12 columns)</option>\n+                <option value="ext" selected="True">Tabular (extended 25 columns)</option>\n+                <option value="cols">Tabular (select which columns)</option>\n+                <option value="5">BLAST XML</option>\n+                <option value="0">Pairwise text</option>\n+                <option value="0 -html">Pairwise HTML</option>\n+                <option value="2">Query-anchored text</option>\n+                <option value="2 -html">Query-anchored HTML</option>\n+                <option value="4">Flat query-anchored text</option>\n+                <option value="4 -html">Flat query-anchored HTML</option>\n+                <!--\n+                <option value="-outfmt 11">BLAST archive format (ASN.1)</option>\n+                -->\n+            </param>\n+            <when value="6"/>\n+            <when value="ext"/>\n+            <when value="cols">\n+                <param name="std_cols" type="select" multiple="true" display="checkboxes" label="Standard columns">\n+                    <option selected="true" value="qseqid">qseqid = Query Seq-id (ID of your sequence)</option>\n+                    <option selected="true" value="sseqid">sseqid = Subject Seq-id (ID of the database hit)</option>\n+                    <option selected="true" value="pident">pident = Percentage of identical matches</option>\n+                    <option selected="true" value="length">length = Alignment length</option>\n+                    <option selected="true" value="mismatch">mismatch = Number of mismatches</option>\n+                    <option selected="true" value="gapopen">gapopen = Number of gap openings</option>\n+                    <option selected="true" value="qstart">qstart = Start of alignment in query</option>\n+                    <option selected="true" value="qend">qend = End of alignment in query</option>\n+                    <option selected="true" '..b'ommon Name(s), separated by a \';\'</option>\n+                    <option value="sblastnames">sblastnames = unique Subject Blast Name(s), separated by a \';\' (in alphabetical order)</option>\n+                    <option value="sskingdoms">sskingdoms = unique Subject Super Kingdom(s), separated by a \';\' (in alphabetical order)</option>\n+                </param>\n+            </when>\n+            <when value="5"/>\n+            <when value="0"/>\n+            <when value="0 -html"/>\n+            <when value="2"/>\n+            <when value="2 -html"/>\n+            <when value="4"/>\n+            <when value="4 -html"/>\n+        </conditional>\n     </xml>\n     <xml name="input_scoring_matrix">\n         <param name="matrix" type="select" label="Scoring matrix">\n@@ -240,7 +311,7 @@\n     <xml name="requirements">\n         <requirements>\n             <requirement type="binary">@BINARY@</requirement>\n-            <requirement type="package" version="2.2.28">blast+</requirement>\n+            <requirement type="package" version="2.2.29">blast+</requirement>\n         </requirements>\n         <version_command>@BINARY@ -version</version_command>\n     </xml>\n@@ -268,10 +339,15 @@\n     </token>\n     <token name="@BLAST_OUTPUT@">-out "$output1"\n ##Set the extended list here so when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n+#if str($output.out_format)=="ext":\n     -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles"\n+#elif str($output.out_format)=="cols"\n+##Pick your own columns. Galaxy gives us it comma separated, BLAST+ wants space separated:\n+##TODO - Can we catch the user picking no columns and raise an error here?\n+#set cols = (str($output.std_cols)+","+str($output.ext_cols)+","+str($output.ids_cols)+","+str($output.misc_cols)+","+str($output.tax_cols)).replace("None", "").replace(",,", ",").replace(",", " ").strip()\n+    -outfmt "6 $cols"\n #else:\n-    -outfmt $out_format\n+    -outfmt $output.out_format\n #end if\n     </token>\n     <token name="@ADVANCED_OPTIONS@">$adv_opts.filter_query\n@@ -330,7 +406,7 @@\n ====== ========= ============================================\n \n The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n+but this takes longer to calculate. Many commonly used extra columns are\n included by selecting the extended tabular output. The extra columns are\n included *after* the standard 12 columns. This is so that you can write\n workflow filtering steps that accept either the 12 or 25 column tabular\n@@ -339,7 +415,7 @@\n ====== ============= ===========================================\n Column NCBI name     Description\n ------ ------------- -------------------------------------------\n-    13 sallseqid     All subject Seq-id(s), separated by \';\'\n+    13 sallseqid     All subject Seq-id(s), separated by a \';\'\n     14 score         Raw score\n     15 nident        Number of identical matches\n     16 positive      Number of positive-scoring matches\n@@ -351,10 +427,14 @@\n     22 sseq          Aligned part of subject sequence\n     23 qlen          Query sequence length\n     24 slen          Subject sequence length\n-    25 salltitles    All subject title(s), separated by \'&lt;&gt;\'\n+    25 salltitles    All subject title(s), separated by a \'&lt;&gt;\'\n ====== ============= ===========================================\n \n-The third option is BLAST XML output, which is designed to be parsed by\n+The third option is to customise the tabular output by selecting which\n+columns you want, from the standard set of 12, the default set of 25,\n+or any of the additional columns BLAST+ offers (including species name).\n+\n+The fourth option is BLAST XML output, which is designed to be parsed by\n another program, and is understood by some Galaxy tools.\n \n You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n'
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Fri Mar 14 07:40:46 2014 -0400
b
b'@@ -1,4 +1,4 @@\n-<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.22">\n+<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.1.00">\n     <description>Make BLAST database</description>\n     <macros>\n         <token name="@BINARY@">makeblastdb</token>\n@@ -8,50 +8,44 @@\n     <command interpreter="python">check_no_duplicates.py\n ##First check for duplicates (since BLAST+ 2.2.28 fails to do so)\n ##and abort (via the ampersand ampersand trick) if any are found.\n-#for $i in $in\n-"${i.file}"\n-#end for\n+#for i in $input_file#"${i}" #end for#\n &amp;&amp;\n makeblastdb -out "${os.path.join($outfile.extra_files_path,\'blastdb\')}"\n $parse_seqids\n $hash_index\n ## Single call to -in with multiple filenames space separated with outer quotes\n ## (presumably any filenames with spaces would be a problem). Note this gives\n-## some extra spaces, e.g. -in " file1 file2 file3  " but BLAST seems happy:\n--in "\n-#for $i in $in\n-${i.file}\n-#end for\n-"\n+## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy:\n+-in "#for i in $input_file#${i} #end for#"\n #if $title:\n -title "$title"\n #else:\n ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful\n -title "BLAST Database"\n #end if\n--dbtype $dbtype \n-#set $mask_string = \'\'\n-#set $sep = \'-mask_data \'\n-#for $i in $mask_data\n-#set $mask_string += $sep + str($i.file)\n-#set $sep = \',\'\n+-dbtype $dbtype\n+## --------------------------------------------------------------------\n+## Masking\n+## --------------------------------------------------------------------\n+## HACK: If no mask files, evaluates as a list with just None in it:\n+## See Trello issue https://trello.com/c/lp5YmA1O\n+#if \' \'.join( map(str, $mask_data_file) ) != \'None\':\n+#for i in $mask_data_file:\n+-mask_data "${i}"\n #end for\n-$mask_string\n-## #set $gi_mask_string = \'\'\n-## #set $sep = \'-gi_mask -gi_mask_name \'\n-## #for $i in $gi_mask\n-## #set $gi_mask_string += $sep + str($i.file)\n-## #set $sep = \',\'\n-## #end for\n-## $gi_mask_string\n-## #if $tax.select == \'id\':\n-## -taxid $tax.id\n-## #else if $tax.select == \'map\':\n-## -taxid_map $tax.map\n-## #end if\n+#end if\n+## --------------------------------------------------------------------\n+## Taxonomy\n+## --------------------------------------------------------------------\n+#if $tax.taxselect == \'id\':\n+-taxid $tax.taxid\n+## TODO - Can we use a tabular file for the taxonomy mapping?\n+## #else if $tax.taxselect == \'map\':\n+## -taxid_map $tax.taxmap\n+#end if\n ## --------------------------------------------------------------------\n ## Capture the stdout log information to the primary file (plain text):\n-&gt;&gt; "$outfile"\n+&gt; "$outfile"\n     </command>\n     <expand macro="stdio" />\n     <inputs>\n@@ -59,47 +53,38 @@\n             <option value="prot">protein</option>\n             <option value="nucl">nucleotide</option>\n         </param>\n-        <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)\n+        <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)?\n              NOTE Double check the new database would be self contained first\n-        <repeat name="in" title="BLAST or FASTA Database" min="1">\n-            <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" />\n-        </repeat>\n         -->\n-        <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? -->\n-        <repeat name="in" title="FASTA file" min="1">\n-            <param name="file" type="data" format="fasta" />\n-        </repeat>\n+        <!-- Note this is a mandatory parameter - default should be most recent FASTA file -->\n+        <param name="input_file" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" />\n         <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output"'..b'     With and without the masking makes no difference.\n+             With and without the taxid the only real difference is in the *.phr file.\n         -->\n         <test>\n             <param name="dbtype" value="prot" />\n-            <param name="file" value="four_human_proteins.fasta" ftype="fasta" />\n+            <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n+            <param name="title" value="Just 4 human proteins" />\n+            <param name="parse_seqids" value="" />\n+            <param name="hash_index" value="true" />\n+            <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">\n+                <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />\n+                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />\n+                <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />\n+                <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />\n+                <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />\n+                <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" />\n+                <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" />\n+                <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />\n+            </output>\n+        </test>\n+        <test>\n+            <param name="dbtype" value="prot" />\n+            <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n             <param name="title" value="Just 4 human proteins" />\n             <param name="parse_seqids" value="" />\n             <param name="hash_index" value="true" />\n-            <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6">\n+            <param name="taxselect" value="id" />\n+            <param name="taxid" value="9606" />\n+            <output name="out_file" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp">\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.pog" name="blastdb.pog" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.phd" name="blastdb.phd" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.phi" name="blastdb.phi" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.psd" name="blastdb.psd" />\n+                <extra_files type="file" value="four_human_proteins_taxid.fasta.psi" name="blastdb.psi" />\n+            </output>\n+        </test>\n+        <test>\n+            <param name="dbtype" value="prot" />\n+            <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />\n+            <param name="title" value="Just 4 human proteins" />\n+            <param name="parse_seqids" value="" />\n+            <param name="hash_index" value="true" />\n+            <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" />\n+            <output name="out_file" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">\n                 <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />\n                 <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />\n                 <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />\n'
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.22">
+<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.1.00">
     <description>Search protein domain database (PSSMs) with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.22">
+<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.1.00">
     <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -0,0 +1,101 @@
+<tool id="ncbi_segmasker_wrapper" name="NCBI BLAST+ segmasker" version="0.1.00">
+    <description>low-complexity regions in protein sequences</description>
+    <macros>
+        <token name="@BINARY@">segmasker</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+segmasker
+#if $db_opts.db_opts_selector == "db":
+  -in "${db_opts.database.fields.path}" -infmt blastdb
+#elif $db_opts.db_opts_selector == "histdb":
+  -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb
+#else:
+  -in "$subject" -infmt fasta
+#end if
+-out "$outfile"
+-window $window
+-locut $locut
+-hicut $hicut
+-outfmt $outformat
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <expand macro="input_conditional_protein_db" />
+        <param name="window" type="integer" value="12" label="SEG window length" help="(-window)" />
+        <param name="locut" type="float" value="2.2" label="SEG low cutoff" help="(-locut)" />
+        <param name="hicut" type="float" value="2.5" label="SEG high cutoff" help="(-hicut)" />
+        <param name="outformat" type="select" label="Output format">
+            <!-- seqloc_* formats are not very useful
+                 and what BLAST+ calls 'interval' is not what Galaxy calls interval format
+            -->
+            <option value="fasta">FASTA</option>
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text" selected="true">maskinfo ASN.1 text</option>
+            <option value="maskinfo_xml">maskinfo_xml</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="maskinfo-asn1" label="SEG Masked File">
+            <change_format>
+                <when input="outformat" value="fasta" format="fasta" />
+                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+ <!--
+                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+ -->
+                <when input="outformat" value="maskinfo_xml" format="xml" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="window" value="12" />
+            <param name="locut" value="2.2" />
+            <param name="hicut" value="2.5" />
+            <param name="outformat" value="fasta" />
+            <output name="outfile" file="segmasker_four_human.fasta" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="window" value="12" />
+            <param name="locut" value="2.2" />
+            <param name="hicut" value="2.5" />
+            <param name="outformat" value="maskinfo_asn1_bin" />
+            <output name="outfile" file="segmasker_four_human.maskinfo-asn1-binary" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="window" value="12" />
+            <param name="locut" value="2.2" />
+            <param name="hicut" value="2.5" />
+            <param name="outformat" value="maskinfo_asn1_text" />
+            <output name="outfile" file="segmasker_four_human.maskinfo-asn1" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+This tool identifies and masks out low complexity regions of a protein database (or proteins in FASTA format) by using the SEG_ algorithm.
+
+If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
+
+More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+    </help>
+</tool>
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.22">
+<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.1.00">
     <description>Search translated nucleotide database with protein query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.22">
+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.1.00">
     <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>
     <!-- If job splitting is enabled, break up the query file into parts -->
     <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>
b
diff -r 6560192c5098 -r 623f727cdff1 tools/ncbi_blast_plus/tool_dependencies.xml
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Tue Jan 21 13:37:01 2014 -0500
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml Fri Mar 14 07:40:46 2014 -0400
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="blast+" version="2.2.28">
-        <repository changeset_revision="23b9ba41ad00" name="package_blast_plus_2_2_28" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+    <package name="blast+" version="2.2.29">
+        <repository changeset_revision="a2ec897aac2c" name="package_blast_plus_2_2_29" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>