Repository 'ncbi_blast_plus'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus

Changeset 11:4c4a0da938ff (2013-12-05)
Previous changeset 10:70e7dcbf6573 (2013-09-23) Next changeset 12:6560192c5098 (2014-01-21)
Commit message:
Uploaded v0.0.22, now wraps BLAST+ 2.2.28 allowing extended tabular output to include the hit descriptions as column 25. Supports $GALAXY_SLOTS. Includes more tests and heavy use of macros.
modified:
test-data/blastn_rhodopsin_vs_three_human.tabular
test-data/blastp_four_human_vs_rhodopsin.xml
test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular
test-data/blastp_four_human_vs_rhodopsin_ext.tabular
test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular
test-data/blastx_rhodopsin_vs_four_human.tabular
test-data/blastx_rhodopsin_vs_four_human.xml
test-data/blastx_rhodopsin_vs_four_human_converted.tabular
test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular
test-data/blastx_rhodopsin_vs_four_human_ext.tabular
test-data/tblastn_four_human_vs_rhodopsin.html
test-data/tblastn_four_human_vs_rhodopsin.xml
test-data/tblastn_four_human_vs_rhodopsin_ext.tabular
tools/ncbi_blast_plus/README.rst
tools/ncbi_blast_plus/blastxml_to_tabular.py
tools/ncbi_blast_plus/blastxml_to_tabular.xml
tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
tools/ncbi_blast_plus/ncbi_makeblastdb.xml
tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
tools/ncbi_blast_plus/repository_dependencies.xml
tools/ncbi_blast_plus/tool_dependencies.xml
added:
test-data/blastn_arabidopsis.extended.tabular
test-data/blastn_arabidopsis.standard.tabular
test-data/blastn_arabidopsis.xml
test-data/dustmasker_three_human.fasta
test-data/dustmasker_three_human.maskinfo-asn1
test-data/dustmasker_three_human.maskinfo-asn1-binary
test-data/four_human_proteins.fasta.log
test-data/four_human_proteins.fasta.phd
test-data/four_human_proteins.fasta.phi
test-data/four_human_proteins.fasta.phr
test-data/four_human_proteins.fasta.pin
test-data/four_human_proteins.fasta.pog
test-data/four_human_proteins.fasta.psd
test-data/four_human_proteins.fasta.psi
test-data/four_human_proteins.fasta.psq
tools/ncbi_blast_plus/check_no_duplicates.py
tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
tools/ncbi_blast_plus/ncbi_macros.xml
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastn_arabidopsis.extended.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_arabidopsis.extended.tabular Thu Dec 05 06:55:59 2013 -0500
b
@@ -0,0 +1,1 @@
+chunk_of_plant chrIII 100.00 630 0 0 1 630 4341 4970 0.0 1164 chrIII 630 630 630 0 100.00 1 1 GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT 630 23459830 gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastn_arabidopsis.standard.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_arabidopsis.standard.tabular Thu Dec 05 06:55:59 2013 -0500
b
@@ -0,0 +1,1 @@
+chunk_of_plant chrIII 100.00 630 0 0 1 630 4341 4970 0.0 1164
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastn_arabidopsis.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastn_arabidopsis.xml Thu Dec 05 06:55:59 2013 -0500
b
@@ -0,0 +1,71 @@
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
+<BlastOutput>
+  <BlastOutput_program>blastn</BlastOutput_program>
+  <BlastOutput_version>BLASTN 2.2.28+</BlastOutput_version>
+  <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), &quot;A greedy algorithm for aligning DNA sequences&quot;, J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>
+  <BlastOutput_db>/mnt/galaxy/galaxy-central/database/files/000/dataset_857_files/blastdb</BlastOutput_db>
+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
+  <BlastOutput_query-def>chunk_of_plant</BlastOutput_query-def>
+  <BlastOutput_query-len>630</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_expect>0.001</Parameters_expect>
+      <Parameters_sc-match>1</Parameters_sc-match>
+      <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>
+      <Parameters_gap-open>0</Parameters_gap-open>
+      <Parameters_gap-extend>0</Parameters_gap-extend>
+      <Parameters_filter>L;m;</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+<BlastOutput_iterations>
+<Iteration>
+  <Iteration_iter-num>1</Iteration_iter-num>
+  <Iteration_query-ID>Query_1</Iteration_query-ID>
+  <Iteration_query-def>chunk_of_plant</Iteration_query-def>
+  <Iteration_query-len>630</Iteration_query-len>
+<Iteration_hits>
+<Hit>
+  <Hit_num>1</Hit_num>
+  <Hit_id>gnl|BL_ORD_ID|2</Hit_id>
+  <Hit_def>chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence</Hit_def>
+  <Hit_accession>2</Hit_accession>
+  <Hit_len>23459830</Hit_len>
+  <Hit_hsps>
+    <Hsp>
+      <Hsp_num>1</Hsp_num>
+      <Hsp_bit-score>1164.51</Hsp_bit-score>
+      <Hsp_score>630</Hsp_score>
+      <Hsp_evalue>0</Hsp_evalue>
+      <Hsp_query-from>1</Hsp_query-from>
+      <Hsp_query-to>630</Hsp_query-to>
+      <Hsp_hit-from>4341</Hsp_hit-from>
+      <Hsp_hit-to>4970</Hsp_hit-to>
+      <Hsp_query-frame>1</Hsp_query-frame>
+      <Hsp_hit-frame>1</Hsp_hit-frame>
+      <Hsp_identity>630</Hsp_identity>
+      <Hsp_positive>630</Hsp_positive>
+      <Hsp_gaps>0</Hsp_gaps>
+      <Hsp_align-len>630</Hsp_align-len>
+      <Hsp_qseq>GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT</Hsp_qseq>
+      <Hsp_hseq>GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT</Hsp_hseq>
+      <Hsp_midline>||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||</Hsp_midline>
+    </Hsp>
+  </Hit_hsps>
+</Hit>
+</Iteration_hits>
+  <Iteration_stat>
+    <Statistics>
+      <Statistics_db-num>5</Statistics_db-num>
+      <Statistics_db-len>119146348</Statistics_db-len>
+      <Statistics_hsp-len>26</Statistics_hsp-len>
+      <Statistics_eff-space>71964315672</Statistics_eff-space>
+      <Statistics_kappa>0.46</Statistics_kappa>
+      <Statistics_lambda>1.28</Statistics_lambda>
+      <Statistics_entropy>0.85</Statistics_entropy>
+    </Statistics>
+  </Iteration_stat>
+</Iteration>
+</BlastOutput_iterations>
+</BlastOutput>
+
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastn_rhodopsin_vs_three_human.tabular
--- a/test-data/blastn_rhodopsin_vs_three_human.tabular Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastn_rhodopsin_vs_three_human.tabular Thu Dec 05 06:55:59 2013 -0500
b
@@ -1,7 +1,7 @@
-gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.10 1050 77 6 1 1047 88 1134 0.0 1474
+gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.07 1047 83 0 1 1047 88 1134 0.0 1474
 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.59 333 28 0 1 333 118 450 9e-133  460
 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.36 243 19 2 3127 3368 782 1023 7e-94  331
 gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.22 173 10 0 1410 1582 448 620 8e-74  265
-gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.98 171 10 2 2854 3023 615 784 8e-69  248
-gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.58 962 75 6 1 959 118 1076 0.0 1323
-gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.55 1052 121 10 1 1047 88 1134 0.0 1208
+gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.94 170 12 0 2854 3023 615 784 8e-69  248
+gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.55 959 81 0 1 959 118 1076 0.0 1323
+gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.50 1048 129 2 1 1047 88 1134 0.0 1208
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastp_four_human_vs_rhodopsin.xml
--- a/test-data/blastp_four_human_vs_rhodopsin.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin.xml Thu Dec 05 06:55:59 2013 -0500
[
b'@@ -2,7 +2,7 @@\n <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n <BlastOutput>\n   <BlastOutput_program>blastp</BlastOutput_program>\n-  <BlastOutput_version>BLASTP 2.2.26+</BlastOutput_version>\n+  <BlastOutput_version>BLASTP 2.2.28+</BlastOutput_version>\n   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n   <BlastOutput_db></BlastOutput_db>\n   <BlastOutput_query-ID>sp|Q9BS26|ERP44_HUMAN</BlastOutput_query-ID>\n@@ -17,630 +17,649 @@\n       <Parameters_filter>F</Parameters_filter>\n     </Parameters>\n   </BlastOutput_param>\n-  <BlastOutput_iterations>\n-    <Iteration>\n-      <Iteration_iter-num>1</Iteration_iter-num>\n-      <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n-      <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n-      <Iteration_query-len>406</Iteration_query-len>\n-      <Iteration_hits></Iteration_hits>\n-      <Iteration_stat>\n-        <Statistics>\n-          <Statistics_db-num>0</Statistics_db-num>\n-          <Statistics_db-len>0</Statistics_db-len>\n-          <Statistics_hsp-len>30</Statistics_hsp-len>\n-          <Statistics_eff-space>119568</Statistics_eff-space>\n-          <Statistics_kappa>0.041</Statistics_kappa>\n-          <Statistics_lambda>0.267</Statistics_lambda>\n-          <Statistics_entropy>0.14</Statistics_entropy>\n-        </Statistics>\n-      </Iteration_stat>\n-      <Iteration_message>No hits found</Iteration_message>\n-    </Iteration>\n-    <Iteration>\n-      <Iteration_iter-num>2</Iteration_iter-num>\n-      <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n-      <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n-      <Iteration_query-len>406</Iteration_query-len>\n-      <Iteration_hits></Iteration_hits>\n-      <Iteration_stat>\n-        <Statistics>\n-          <Statistics_db-num>0</Statistics_db-num>\n-          <Statistics_db-len>0</Statistics_db-len>\n-          <Statistics_hsp-len>30</Statistics_hsp-len>\n-          <Statistics_eff-space>119568</Statistics_eff-space>\n-          <Statistics_kappa>0.041</Statistics_kappa>\n-          <Statistics_lambda>0.267</Statistics_lambda>\n-          <Statistics_entropy>0.14</Statistics_entropy>\n-        </Statistics>\n-      </Iteration_stat>\n-      <Iteration_message>No hits found</Iteration_message>\n-    </Iteration>\n-    <Iteration>\n-      <Iteration_iter-num>3</Iteration_iter-num>\n-      <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n-      <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n-      <Iteration_query-len>406</Iteration_query-len>\n-      <Iteration_hits></Iteration_hits>\n-      <Iteration_stat>\n-        <Statistics>\n-          <Statistics_db-num>0</Statistics_db-num>\n-          <Statistics_db-len>0</Statistics_db-len>\n-          <Statistics_hsp-len>30</Statistics_hsp-len>\n-          <Statistics_eff-space>119568</Statistics_eff-space>\n-          <Statistics_kappa>0.041</Statistics_kappa>\n-          <Statistics_lambda>0.267</Statistics_lambda>\n-          <Statistics_entropy>0.14</Statistics_entropy>\n-        </Statistics>\n-      </Iteration_stat>\n-      <Iteration_message>No hits found</Iteration_message>\n-    </Iteration>\n-    <Iteration>\n-      <Iteration_iter-num>4</Iteration_iter-num>\n-      <Iteration_query-ID>sp|Q9BS26|ERP44_HUMAN</Iteration_query-ID>\n-      <Iteration_query-def>Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n-      <Iteration_query-len>406</Iteration_query-len>\n-      <Iteration_hits></Iteratio'..b'q>\n+      <Hsp_hseq>MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGID YT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>29</Statistics_hsp-len>\n+      <Statistics_eff-space>101761</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>24</Iteration_iter-num>\n+  <Iteration_query-ID>sp|P08100|OPSD_HUMAN</Iteration_query-ID>\n+  <Iteration_query-def>Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>348</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+  <Hit_num>1</Hit_num>\n+  <Hit_id>gi|12583665|dbj|BAB21486.1|</Hit_id>\n+  <Hit_def>fresh water form rod opsin [Conger myriaster]</Hit_def>\n+  <Hit_accession>BAB21486</Hit_accession>\n+  <Hit_len>354</Hit_len>\n+  <Hit_hsps>\n+    <Hsp>\n+      <Hsp_num>1</Hsp_num>\n+      <Hsp_bit-score>599.356</Hsp_bit-score>\n+      <Hsp_score>1544</Hsp_score>\n+      <Hsp_evalue>0</Hsp_evalue>\n+      <Hsp_query-from>1</Hsp_query-from>\n+      <Hsp_query-to>341</Hsp_query-to>\n+      <Hsp_hit-from>1</Hsp_hit-from>\n+      <Hsp_hit-to>342</Hsp_hit-to>\n+      <Hsp_query-frame>0</Hsp_query-frame>\n+      <Hsp_hit-frame>0</Hsp_hit-frame>\n+      <Hsp_identity>281</Hsp_identity>\n+      <Hsp_positive>314</Hsp_positive>\n+      <Hsp_gaps>1</Hsp_gaps>\n+      <Hsp_align-len>342</Hsp_align-len>\n+      <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE</Hsp_qseq>\n+      <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV  TW MALACA PPL GWSRYIPEGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP   +D ASAT SKTE</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>29</Statistics_hsp-len>\n+      <Statistics_eff-space>101761</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular Thu Dec 05 06:55:59 2013 -0500
[
b'@@ -1,6 +1,6 @@\n-sp|P08100|OPSD_HUMAN\tgi|57163783|ref|NP_001009242.1|\t96.55\t348\t12\t0\t1\t348\t1\t348\t0.0\t701\tgi|57163783|ref|NP_001009242.1|\t1808\t336\t343\t0\t98.56\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\t348\t348\n-sp|P08100|OPSD_HUMAN\tgi|3024260|sp|P56514.1|OPSD_BUFBU\t84.80\t342\t51\t1\t1\t341\t1\t342\t0.0\t619\tgi|3024260|sp|P56514.1|OPSD_BUFBU\t1595\t290\t322\t1\t94.15\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t354\n-sp|P08100|OPSD_HUMAN\tgi|283855846|gb|ADB45242.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t653\tgi|283855846|gb|ADB45242.1|\t1684\t311\t321\t0\t97.87\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\n-sp|P08100|OPSD_HUMAN\tgi|283855823|gb|ADB45229.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t631\tgi|283855823|gb|ADB45229.1|\t1627\t311\t323\t0\t98.48\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\n-sp|P08100|OPSD_HUMAN\tgi|223523|prf||0811197A\t93.10\t348\t23\t1\t1\t348\t1\t347\t0.0\t673\tgi|223523|prf||0811197A\t1736\t324\t336\t1\t96.55\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEET'..b'LVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t354\tRecName: Full=Rhodopsin\n+sp|P08100|OPSD_HUMAN\tgi|283855846|gb|ADB45242.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t653\tgi|283855846|gb|ADB45242.1|\t1684\t311\t321\t0\t97.87\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\trhodopsin [Cynopterus brachyotis]\n+sp|P08100|OPSD_HUMAN\tgi|283855823|gb|ADB45229.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t631\tgi|283855823|gb|ADB45229.1|\t1627\t311\t323\t0\t98.48\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\trhodopsin [Myotis pilosus]\n+sp|P08100|OPSD_HUMAN\tgi|223523|prf||0811197A\t93.10\t348\t23\t1\t1\t348\t1\t347\t0.0\t673\tgi|223523|prf||0811197A\t1736\t324\t336\t1\t96.55\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t347\trhodopsin [Bos taurus]\n+sp|P08100|OPSD_HUMAN\tgi|12583665|dbj|BAB21486.1|\t82.16\t342\t60\t1\t1\t341\t1\t342\t0.0\t599\tgi|12583665|dbj|BAB21486.1|\t1544\t281\t314\t1\t91.81\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE\t348\t354\tfresh water form rod opsin [Conger myriaster]\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastp_four_human_vs_rhodopsin_ext.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,6 +1,6 @@\n-sp|P08100|OPSD_HUMAN\tgi|57163783|ref|NP_001009242.1|\t96.55\t348\t12\t0\t1\t348\t1\t348\t0.0\t 701\tgi|57163783|ref|NP_001009242.1|\t1808\t336\t343\t0\t98.56\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\t348\t348\n-sp|P08100|OPSD_HUMAN\tgi|3024260|sp|P56514.1|OPSD_BUFBU\t84.80\t342\t51\t1\t1\t341\t1\t342\t0.0\t 619\tgi|3024260|sp|P56514.1|OPSD_BUFBU\t1595\t290\t322\t1\t94.15\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t354\n-sp|P08100|OPSD_HUMAN\tgi|283855846|gb|ADB45242.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t 653\tgi|283855846|gb|ADB45242.1|\t1684\t311\t321\t0\t97.87\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\n-sp|P08100|OPSD_HUMAN\tgi|283855823|gb|ADB45229.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t 631\tgi|283855823|gb|ADB45229.1|\t1627\t311\t323\t0\t98.48\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\n-sp|P08100|OPSD_HUMAN\tgi|223523|prf||0811197A\t93.10\t348\t23\t1\t1\t348\t1\t347\t0.0\t 673\tgi|223523|prf||0811197A\t1736\t324\t336\t1\t96.55\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YT'..b'\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t354\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855846|gb|ADB45242.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t 653\tgi|283855846|gb|ADB45242.1|\t1684\t311\t321\t0\t97.87\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855823|gb|ADB45229.1|\t94.82\t328\t17\t0\t11\t338\t1\t328\t0.0\t 631\tgi|283855823|gb|ADB45229.1|\t1627\t311\t323\t0\t98.48\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|223523|prf||0811197A\t93.10\t348\t23\t1\t1\t348\t1\t347\t0.0\t 673\tgi|223523|prf||0811197A\t1736\t324\t336\t1\t96.55\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t347\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|12583665|dbj|BAB21486.1|\t82.16\t342\t60\t1\t1\t341\t1\t342\t0.0\t 599\tgi|12583665|dbj|BAB21486.1|\t1544\t281\t314\t1\t91.81\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE\t348\t354\tN/A\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular
--- a/test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastp_human_vs_pdb_seg_no_converted_ext.tabular Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,8 +1,8 @@\n-sp|Q9BS26|ERP44_HUMAN\tgi|193885198|pdb|2R2J|A\t97.11\t381\t11\t0\t26\t406\t2\t382\t0.0\t768\tgi|193885198|pdb|2R2J|A\t1982\t370\t372\t0\t97.64\t1\t1\tPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL\tPLGSEITSLDTENIDEILNNADVALVNFYADWCRFSQXLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGXXXKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDXVYLGAXTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHXKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPVIAIDSFRHXYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL\t406\t382\n-sp|Q9BS26|ERP44_HUMAN\tgi|88192228|pdb|2B5E|A\t25.17\t290\t193\t8\t25\t306\t10\t283\t4e-20\t95.1\tgi|88192228|pdb|2B5E|A;gi|206581884|pdb|3BOA|A\t235\t73\t133\t24\t45.86\t1\t1\tTPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKR-EYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNI---IYKPPGHSAPDMVYLGA---MTNFDVTYNWIQDKCVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKF-RH\tAPEDSAVVKLATDSFNEYIQSHDLVLAEFFAPWCGHCKNMAPEYVKAAETLVEK-----NITLAQIDCTENQDLCMEHNIPGFPSLKIFKNSDVNNSIDYEGPRTAEAIVQFMIKQSQPAVAVVADLPAYLANETFVTPVIVQSGKIDADFNATFYSMANKHFNDYDFVSA--------ENADDDFKLSIYLPSAMDEP-VVYNGKKADIADADVFEKWLQVEALPYFGEIDGSVFAQYVESGLPLGYLFY--NDEEELEEYKPLFTELAKKNRGLMNFVSIDARKFGRH\t406\t504\n-sp|Q9NSY1|BMP2K_HUMAN\tgi|73536291|pdb|2BUJ|A\t29.39\t279\t182\t8\t40\t308\t21\t294\t1e-22\t105\tgi|73536291|pdb|2BUJ|A;gi|73536292|pdb|2BUJ|B\t262\t82\t130\t15\t46.59\t1\t1\tGVRVFAVGRHQVTLEESLAEGGFSTVFLVR-THGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTG--FTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDG-VNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPF------GESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDI\tGHMVIIDNKHYLFIQK-LGEGGFSYVDLVEGLHDGHFYALKRILCHEQQDREEAQREAD-MHRLFNHPNILRLVAYCLRERGAKH-EAWLLLPFFKRGTLWNEIERLKDKGNFLTEDQILWLLLGICRGLEAIH--AKGYAHRDLKPTNILLGDEGQPVLMDLGSMNQACIHVEGSRQALTLQDWAAQRCTISYRAPELFSVQSHCVIDERTDVWSLGCVLYAMMFGEGPYDMVFQKGDSVALAVQNQLSIPQSPRHSSALWQLLNSMMTVDPHQRPHI\t1161\t317\n-sp|Q9NSY1|BMP2K_HUMAN\tgi|270346335|pdb|2WQM|A\t27.21\t272\t166\t12\t53\t311\t36\t288\t6e-17\t86.3\tgi|270346335|pdb|2WQM|A;gi|270346336|pdb|2WQN|A\t212\t74\t129\t32\t47.43\t1\t1\tLEESLAEGGFSTVFLVRTH-GGIRCALKRMYVNNMPDLNV---CKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMN--KKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGGKPITTKADIWALGCLLYKLCFFTLPFGESQV---AICD----GNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQV\tIEKKIGRGQFSEVYRAACLLDGVPVALKKVQIFDLMDAKARADCIKEIDLLKQLN-HPNVIKYY---ASFIEDN--ELNIVLELADAGDLSRMIKHFKKQKRLIPERTVWKYFVQLCSALEHMHSRR--VMHRDIKPANVFITATGVVKLGDLG--LGRFFSSKTTAAHSL------VGTPYYMSPERIHENG---YNFKSDIWSLGCLLYEMAALQSPFYGDKMNLYSLCKKIEQCDYPPLPSDHYSEELRQLVNMCINPDPEKRPDVTYV\t1161\t310\n-sp|P06213|INSR_HUMAN\tgi|116667097|pdb|2DTG|E\t95.91\t928\t7\t2\t28\t955\t1\t897\t0.0\t1846\tgi|116667097|pdb|2DTG|E\t4781\t890\t893\t31\t96.23\t1\t1\tHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSS'..b'83860|pdb|2I35|A;gi|157883861|pdb|2I36|A;gi|157883862|pdb|2I36|B;gi|157883863|pdb|2I36|C;gi|157883864|pdb|2I37|A;gi|157883865|pdb|2I37|B;gi|157883866|pdb|2I37|C;gi|159795066|pdb|2PED|A;gi|159795067|pdb|2PED|B;gi|192988480|pdb|3CAP|A;gi|192988481|pdb|3CAP|B;gi|195927457|pdb|3C9L|A;gi|197107530|pdb|1F88|A;gi|197107531|pdb|1F88|B;gi|206582030|pdb|3DQB|A\t1756\t325\t337\t0\t96.84\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t348\tChain A, Structure Of Bovine Rhodopsin (Dark Adapted)<>Chain A, Structure Of Bovine Rhodopsin (Metarhodopsin Ii)<>Chain A, Structure Of Bovine Rhodopsin In A Trigonal Crystal Form<>Chain B, Structure Of Bovine Rhodopsin In A Trigonal Crystal Form<>Chain A, Crystal Structure Of Bovine Rhodopsin<>Chain B, Crystal Structure Of Bovine Rhodopsin<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.6 Angstroms Resolution<>Chain A, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution<>Chain B, Crystal Structure Of Bovine Rhodopsin At 2.2 Angstroms Resolution<>Chain A, Crystallographic Model Of Bathorhodopsin<>Chain B, Crystallographic Model Of Bathorhodopsin<>Chain A, Crystallographic Model Of Lumirhodopsin<>Chain B, Crystallographic Model Of Lumirhodopsin<>Chain A, Crystal Structure Of Rhombohedral Crystal Form Of Ground- State Rhodopsin<>Chain A, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain B, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain C, Crystal Structure Of Trigonal Crystal Form Of Ground-State Rhodopsin<>Chain A, Crystal Structure Of A Photoactivated Rhodopsin<>Chain B, Crystal Structure Of A Photoactivated Rhodopsin<>Chain C, Crystal Structure Of A Photoactivated Rhodopsin<>Chain A, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain B, Crystallographic Model Of 9-Cis-Rhodopsin<>Chain A, Crystal Structure Of Native Opsin: The G Protein-Coupled Receptor Rhodopsin In Its Ligand-Free State<>Chain B, Crystal Structure Of Native Opsin: The G Protein-Coupled Receptor Rhodopsin In Its Ligand-Free State<>Chain A, Structure Of Ground-State Bovine Rhodospin In A Hexagonal Crystal Form<>Chain A, Crystal Structure Of Bovine Rhodopsin<>Chain B, Crystal Structure Of Bovine Rhodopsin<>Chain A, Crystal Structure Of The Active G-Protein-Coupled Receptor Opsin In Complex With A C-Terminal Peptide Derived From The Galpha Subunit Of Transducin\n+sp|P08100|OPSD_HUMAN\tgi|195927458|pdb|3C9M|A\t93.10\t348\t24\t0\t1\t348\t1\t348\t0.0\t674\tgi|195927458|pdb|3C9M|A\t1738\t324\t335\t0\t96.26\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMCGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSCFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t348\tChain A, Structure Of A Mutant Bovine Rhodopsin In Hexagonal Crystal Form\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human.tabular Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human.tabular Thu Dec 05 06:55:59 2013 -0500
b
@@ -1,10 +1,10 @@
-gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0  662
-gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0  575
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68  224
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36  129
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33  120
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32  118
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2
-gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0  613
-gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0  641
-gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0  559
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0  639
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0  551
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67  220
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35  127
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33  121
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0  589
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0  619
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0  532
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human.xml
--- a/test-data/blastx_rhodopsin_vs_four_human.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -2,7 +2,7 @@\n <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n <BlastOutput>\n   <BlastOutput_program>blastx</BlastOutput_program>\n-  <BlastOutput_version>BLASTX 2.2.26+</BlastOutput_version>\n+  <BlastOutput_version>BLASTX 2.2.28+</BlastOutput_version>\n   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n   <BlastOutput_db></BlastOutput_db>\n   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n@@ -17,706 +17,725 @@\n       <Parameters_filter>L;</Parameters_filter>\n     </Parameters>\n   </BlastOutput_param>\n-  <BlastOutput_iterations>\n-    <Iteration>\n-      <Iteration_iter-num>1</Iteration_iter-num>\n-      <Iteration_query-ID>Query_1</Iteration_query-ID>\n-      <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n-      <Iteration_query-len>1047</Iteration_query-len>\n-      <Iteration_hits></Iteration_hits>\n-      <Iteration_stat>\n-        <Statistics>\n-          <Statistics_db-num>0</Statistics_db-num>\n-          <Statistics_db-len>0</Statistics_db-len>\n-          <Statistics_hsp-len>29</Statistics_hsp-len>\n-          <Statistics_eff-space>102080</Statistics_eff-space>\n-          <Statistics_kappa>0.041</Statistics_kappa>\n-          <Statistics_lambda>0.267</Statistics_lambda>\n-          <Statistics_entropy>0.14</Statistics_entropy>\n-        </Statistics>\n-      </Iteration_stat>\n-      <Iteration_message>No hits found</Iteration_message>\n-    </Iteration>\n-    <Iteration>\n-      <Iteration_iter-num>2</Iteration_iter-num>\n-      <Iteration_query-ID>Query_1</Iteration_query-ID>\n-      <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n-      <Iteration_query-len>1047</Iteration_query-len>\n-      <Iteration_hits></Iteration_hits>\n-      <Iteration_stat>\n-        <Statistics>\n-          <Statistics_db-num>0</Statistics_db-num>\n-          <Statistics_db-len>0</Statistics_db-len>\n-          <Statistics_hsp-len>29</Statistics_hsp-len>\n-          <Statistics_eff-space>102080</Statistics_eff-space>\n-          <Statistics_kappa>0.041</Statistics_kappa>\n-          <Statistics_lambda>0.267</Statistics_lambda>\n-          <Statistics_entropy>0.14</Statistics_entropy>\n-        </Statistics>\n-      </Iteration_stat>\n-      <Iteration_message>No hits found</Iteration_message>\n-    </Iteration>\n-    <Iteration>\n-      <Iteration_iter-num>3</Iteration_iter-num>\n-      <Iteration_query-ID>Query_1</Iteration_query-ID>\n-      <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n-      <Iteration_query-len>1047</Iteration_query-len>\n-      <Iteration_hits></Iteration_hits>\n-      <Iteration_stat>\n-        <Statistics>\n-          <Statistics_db-num>0</Statistics_db-num>\n-          <Statistics_db-len>0</Statistics_db-len>\n-          <Statistics_hsp-len>29</Statistics_hsp-len>\n-          <Statistics_eff-space>102080</Statistics_eff-space>\n-          <Statistics_kappa>0.041</Statistics_kappa>\n-          <Statistics_lambda>0.267</Statistics_lambda>\n-          <Statistics_entropy>0.14</Statistics_entropy>\n-        </Statistics>\n-      </Iteration_stat>\n-      <Iteration_message>No hits found</Iteration_message>\n-    </Iteration>\n-    <Iteration>\n-      <Iteration_iter-num>4</Iteration_iter-num>\n-      <Iteration_query-ID>Query_1</Iteration_query-ID>\n-      <Iteration_query-def>gi|57163782|ref|NM_001009242.1| Felis catus rhodopsin (RHO), mRNA</Iteration_query-def>\n-      <Iteration_query-len>1047</Iteration_query-len>\n-      <Iteration_hits>\n-        <Hit>\n-          <Hit_num>1</Hit_num>\n-          <Hit_id>Subject_4</Hit_id>\n-          <Hit_def>sp|P08100|OPS'..b'Statistics_db-len>\n+      <Statistics_hsp-len>32</Statistics_hsp-len>\n+      <Statistics_eff-space>155584</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>23</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>32</Statistics_hsp-len>\n+      <Statistics_eff-space>155584</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+  <Iteration_message>No hits found</Iteration_message>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>24</Iteration_iter-num>\n+  <Iteration_query-ID>Query_6</Iteration_query-ID>\n+  <Iteration_query-def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Iteration_query-def>\n+  <Iteration_query-len>1344</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+  <Hit_num>1</Hit_num>\n+  <Hit_id>Subject_4</Hit_id>\n+  <Hit_def>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Hit_def>\n+  <Hit_accession>Subject_4</Hit_accession>\n+  <Hit_len>348</Hit_len>\n+  <Hit_hsps>\n+    <Hsp>\n+      <Hsp_num>1</Hsp_num>\n+      <Hsp_bit-score>532.717</Hsp_bit-score>\n+      <Hsp_score>1371</Hsp_score>\n+      <Hsp_evalue>0</Hsp_evalue>\n+      <Hsp_query-from>23</Hsp_query-from>\n+      <Hsp_query-to>1021</Hsp_query-to>\n+      <Hsp_hit-from>1</Hsp_hit-from>\n+      <Hsp_hit-to>333</Hsp_hit-to>\n+      <Hsp_query-frame>2</Hsp_query-frame>\n+      <Hsp_hit-frame>0</Hsp_hit-frame>\n+      <Hsp_identity>272</Hsp_identity>\n+      <Hsp_positive>307</Hsp_positive>\n+      <Hsp_gaps>0</Hsp_gaps>\n+      <Hsp_align-len>333</Hsp_align-len>\n+      <Hsp_qseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG</Hsp_qseq>\n+      <Hsp_hseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE+HAIMGV  TW MALACA PPL GWSRYIPEGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA+YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR+CM+TT+CCGKNP  +++ </Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>32</Statistics_hsp-len>\n+      <Statistics_eff-space>155584</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human_converted.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted.tabular Thu Dec 05 06:55:59 2013 -0500
b
@@ -1,10 +1,10 @@
-gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 662
-gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 575
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 2e-68 224
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 5e-36 129
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.43 56 2 0 2855 3022 177 232 3e-33 120
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 2e-32 118
-gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.00 25 2 0 4222 4296 312 336 2e-12 56.2
-gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 613
-gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 641
-gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.93 332 60 0 23 1018 1 332 0.0 559
+gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.55 348 12 0 1 1044 1 348 0.0 639
+gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.24 332 49 0 42 1037 1 332 0.0 551
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.40 111 4 0 1 333 11 121 4e-67 220
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.31 65 5 0 3174 3368 248 312 2e-35 127
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.53 59 5 0 2855 3031 177 235 2e-33 121
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.22 59 4 0 1404 1580 119 177 1e-25 97.1
+gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.46 26 3 0 4222 4299 312 337 1e-12 57.0
+gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.09 326 16 0 1 978 11 336 0.0 589
+gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.39 348 23 0 1 1044 1 348 0.0 619
+gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.68 333 61 0 23 1021 1 333 0.0 532
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human_converted_ext.tabular Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,10 +1,10 @@\n-gi|57163782|ref|NM_001009242.1|\tsp|P08100|OPSD_HUMAN\t96.55\t348\t12\t0\t1\t1044\t1\t348\t0.0\t662\tsp|P08100|OPSD_HUMAN\t1707\t336\t343\t0\t98.56\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\n-gi|2734705|gb|U59921.1|BBU59921\tsp|P08100|OPSD_HUMAN\t85.24\t332\t49\t0\t42\t1037\t1\t332\t0.0\t575\tsp|P08100|OPSD_HUMAN\t1481\t283\t315\t0\t94.88\t3\t0\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE\t1574\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t96.40\t111\t4\t0\t1\t333\t11\t121\t2e-68\t224\tsp|P08100|OPSD_HUMAN\t570\t107\t109\t0\t98.20\t1\t0\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG\t4301\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t92.31\t65\t5\t0\t3174\t3368\t248\t312\t5e-36\t129\tsp|P08100|OPSD_HUMAN\t324\t60\t64\t0\t98.46\t3\t0\tKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ\tKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ\t4301\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t96.43\t56\t2\t0\t2855\t3022\t177\t232\t3e-33\t120\tsp|P08100|OPSD_HUMAN\t302\t54\t56\t0\t100.00\t2\t0\tRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE\tRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE\t4301\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t93.22\t59\t4\t0\t1404\t1580\t119\t177\t2e-32\t118\tsp|P08100|OPSD_HUMAN\t295\t55\t56\t0\t94.92\t3\t0\tLAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR\tLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR\t4301\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t92.00\t25\t2\t0\t4222\t4296\t312\t336\t2e-12\t56.2\tsp|P08100|OPSD_HUMAN\t134\t23\t24\t0\t96.00\t1\t0\tQFRNCMLTTLCCGKNPLGDDEASTT\tQFRNCMLTTICCGKNPLGDDEASAT\t4301\t348\n-gi|283855822|gb|GQ290312.1|\tsp|P08100|OPSD_HUMAN\t95.09\t326\t16\t0\t1\t978\t11\t336\t0.0\t613\tsp|P08100|OPSD_HUMAN\t1582\t310\t322\t0\t98.77\t1\t0\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT\t983\t348\n-gi|18148870|dbj|AB062417.1|\tsp|P08100|OPSD_HUMAN\t93.39\t348\t23\t0\t1\t1044\t1\t348\t0.0\t641\tsp|P08100|OPSD_HUMAN\t1654\t325\t337\t0\t96.84\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAY'..b'AEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG\t4301\t348\tRhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t92.31\t65\t5\t0\t3174\t3368\t248\t312\t2e-35\t127\tsp|P08100|OPSD_HUMAN\t319\t60\t64\t0\t98.46\t3\t0\tKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ\tKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ\t4301\t348\tRhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t91.53\t59\t5\t0\t2855\t3031\t177\t235\t2e-33\t121\tsp|P08100|OPSD_HUMAN\t303\t54\t57\t0\t96.61\t2\t0\tRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS\tRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA\t4301\t348\tRhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t93.22\t59\t4\t0\t1404\t1580\t119\t177\t1e-25\t97.1\tsp|P08100|OPSD_HUMAN\t240\t55\t56\t0\t94.92\t3\t0\tLAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR\tLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR\t4301\t348\tRhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t88.46\t26\t3\t0\t4222\t4299\t312\t337\t1e-12\t57.0\tsp|P08100|OPSD_HUMAN\t136\t23\t24\t0\t92.31\t1\t0\tQFRNCMLTTLCCGKNPLGDDEASTTA\tQFRNCMLTTICCGKNPLGDDEASATV\t4301\t348\tRhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n+gi|283855822|gb|GQ290312.1|\tsp|P08100|OPSD_HUMAN\t95.09\t326\t16\t0\t1\t978\t11\t336\t0.0\t589\tsp|P08100|OPSD_HUMAN\t1518\t310\t322\t0\t98.77\t1\t0\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT\t983\t348\tRhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n+gi|18148870|dbj|AB062417.1|\tsp|P08100|OPSD_HUMAN\t93.39\t348\t23\t0\t1\t1044\t1\t348\t0.0\t619\tsp|P08100|OPSD_HUMAN\t1596\t325\t337\t0\t96.84\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVXXXXXXXXXXXXXXXXXKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\tRhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n+gi|12583664|dbj|AB043817.1|\tsp|P08100|OPSD_HUMAN\t81.68\t333\t61\t0\t23\t1021\t1\t333\t0.0\t532\tsp|P08100|OPSD_HUMAN\t1371\t272\t307\t0\t92.19\t2\t0\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKXXXXXXXXXXXXXXXXXXVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA\t1344\t348\tRhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/blastx_rhodopsin_vs_four_human_ext.tabular
--- a/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human_ext.tabular Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,10 +1,10 @@\n-gi|57163782|ref|NM_001009242.1|\tsp|P08100|OPSD_HUMAN\t96.55\t348\t12\t0\t1\t1044\t1\t348\t0.0\t 662\tsp|P08100|OPSD_HUMAN\t1707\t336\t343\t0\t98.56\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\n-gi|2734705|gb|U59921.1|BBU59921\tsp|P08100|OPSD_HUMAN\t85.24\t332\t49\t0\t42\t1037\t1\t332\t0.0\t 575\tsp|P08100|OPSD_HUMAN\t1481\t283\t315\t0\t94.88\t3\t0\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDD\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE\t1574\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t96.40\t111\t4\t0\t1\t333\t11\t121\t2e-68\t 224\tsp|P08100|OPSD_HUMAN\t570\t107\t109\t0\t98.20\t1\t0\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG\t4301\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t92.31\t65\t5\t0\t3174\t3368\t248\t312\t5e-36\t 129\tsp|P08100|OPSD_HUMAN\t324\t60\t64\t0\t98.46\t3\t0\tKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ\tKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ\t4301\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t96.43\t56\t2\t0\t2855\t3022\t177\t232\t3e-33\t 120\tsp|P08100|OPSD_HUMAN\t302\t54\t56\t0\t100.00\t2\t0\tRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKE\tRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKE\t4301\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t93.22\t59\t4\t0\t1404\t1580\t119\t177\t2e-32\t 118\tsp|P08100|OPSD_HUMAN\t295\t55\t56\t0\t94.92\t3\t0\tLAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR\tLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR\t4301\t348\n-gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t92.00\t25\t2\t0\t4222\t4296\t312\t336\t2e-12\t56.2\tsp|P08100|OPSD_HUMAN\t134\t23\t24\t0\t96.00\t1\t0\tQFRNCMLTTLCCGKNPLGDDEASTT\tQFRNCMLTTICCGKNPLGDDEASAT\t4301\t348\n-gi|283855822|gb|GQ290312.1|\tsp|P08100|OPSD_HUMAN\t95.09\t326\t16\t0\t1\t978\t11\t336\t0.0\t 613\tsp|P08100|OPSD_HUMAN\t1582\t310\t322\t0\t98.77\t1\t0\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT\t983\t348\n-gi|18148870|dbj|AB062417.1|\tsp|P08100|OPSD_HUMAN\t93.39\t348\t23\t0\t1\t1044\t1\t348\t0.0\t 641\tsp|P08100|OPSD_HUMAN\t1654\t325\t337\t0\t96.84\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPW'..b'LKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDE\t1574\t348\tN/A\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t96.40\t111\t4\t0\t1\t333\t11\t121\t4e-67\t 220\tsp|P08100|OPSD_HUMAN\t560\t107\t109\t0\t98.20\t1\t0\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG\t4301\t348\tN/A\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t92.31\t65\t5\t0\t3174\t3368\t248\t312\t2e-35\t 127\tsp|P08100|OPSD_HUMAN\t319\t60\t64\t0\t98.46\t3\t0\tKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ\tKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ\t4301\t348\tN/A\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t91.53\t59\t5\t0\t2855\t3031\t177\t235\t2e-33\t 121\tsp|P08100|OPSD_HUMAN\t303\t54\t57\t0\t96.61\t2\t0\tRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS\tRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA\t4301\t348\tN/A\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t93.22\t59\t4\t0\t1404\t1580\t119\t177\t1e-25\t97.1\tsp|P08100|OPSD_HUMAN\t240\t55\t56\t0\t94.92\t3\t0\tLAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR\tLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR\t4301\t348\tN/A\n+gi|283855845|gb|GQ290303.1|\tsp|P08100|OPSD_HUMAN\t88.46\t26\t3\t0\t4222\t4299\t312\t337\t1e-12\t57.0\tsp|P08100|OPSD_HUMAN\t136\t23\t24\t0\t92.31\t1\t0\tQFRNCMLTTLCCGKNPLGDDEASTTA\tQFRNCMLTTICCGKNPLGDDEASATV\t4301\t348\tN/A\n+gi|283855822|gb|GQ290312.1|\tsp|P08100|OPSD_HUMAN\t95.09\t326\t16\t0\t1\t978\t11\t336\t0.0\t 589\tsp|P08100|OPSD_HUMAN\t1518\t310\t322\t0\t98.77\t1\t0\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT\t983\t348\tN/A\n+gi|18148870|dbj|AB062417.1|\tsp|P08100|OPSD_HUMAN\t93.39\t348\t23\t0\t1\t1044\t1\t348\t0.0\t 619\tsp|P08100|OPSD_HUMAN\t1596\t325\t337\t0\t96.84\t1\t0\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\t1047\t348\tN/A\n+gi|12583664|dbj|AB043817.1|\tsp|P08100|OPSD_HUMAN\t81.68\t333\t61\t0\t23\t1021\t1\t333\t0.0\t 532\tsp|P08100|OPSD_HUMAN\t1371\t272\t307\t0\t92.19\t2\t0\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDG\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA\t1344\t348\tN/A\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/dustmasker_three_human.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dustmasker_three_human.fasta Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -0,0 +1,183 @@\n+>ENA|AB011145|AB011145.1 Homo sapiens mRNA for KIAA0573 protein, partial cds\n+GAGAGGACGAGGTGCCGCTGCCTGGAGAATCCTCCGCTGCCGTCGGCTCCCGGAGCCCAG\n+CCCTTTCCTAACCCAACCCAACCTAGCCCAGTCCCAGCCGCCAGCGCCTGTCCCTGTCAC\n+GGACCCCAGCGTTACCATGCATCCTGCCGTCTTCCTATCCTTACCCGACCTCAGATGCTC\n+CCTTCTGCTCCTGGTAACTTGGGTTTTTACTCCTGTAACAACTGAAATAACAAGTCTTGA\n+TACAGAGAATATAGATGAAATTTTAAACAATGCTGATGTTGCTTTAGTAAATTTTTATGC\n+TGACTGGTGTCGTTTCAGTCAGATGTTGCATCCAATTTTTGAGGAAGCTTCCGATGTCAT\n+TAAGGAAGAATTTCCAAATGAAAATCAAGTAGTGTTTGCCAGAGTTGATTGTGATCAGCA\n+CTCTGACATAGCCCAGAGATACAGGATAAGCAAATACCCAACCCTCAAATTGTTTCGTAA\n+TGGGATGATGATGAAGAGAGAATACAGGGGTCAGCGATCAGTGAAAGCATTGGCAGATTA\n+CATCAGGCAACAAAAAAGTGACCCCATTCAAGAAATTCGGGACTTAGCAGAAATCACCAC\n+TCTTGATCGCAGCAAAAGAAATATCATTGGATATTTTGAGCAAAAGGACTCGGACAACTA\n+TAGAGTTTTTGAACGAGTAGCGAATATTTTGCATGATGACTGTGCCTTTCTTTCTGCATT\n+TGGGGATGTTTCAAAACCGGAAAGATATAGTGGCGACAACATAATCTACAAACCACCAGG\n+GCATTCTGCTCCGGATATGGTGTACTTGGGAGCTATGACAAATTTTGATGTGACTTACAA\n+TTGGATTCAAGATAAATGTGTTCCTCTTGTCCGAGAAATAACATTTGAAAATGGAGAGGA\n+ATTGACAGAAGAAGGACTGCCTTTTCTCATACTCTTTCACATGAAAGAAGATACAGAAAG\n+TTTAGAAATATTCCAGAATGAAGTAGCTCGGCAATTAATAAGTGAAAAAGGTACAATAAA\n+CTTTTTACATGCCGATTGTGACAAATTTAGACATCCTCTTCTGCACATACAGAAAACTCC\n+AGCAGATTGTCCTGTAATCGCTATTGACAGCTTTAGGCATATGTATGTGTTTGGAGACTT\n+CAAAGATGTATTAATTCCTGGAAAACTCAAGCAATTCGTATTTGACTTACATTCTGGAAA\n+ACTGCACAGAGAATTCCATCATGGACCTGACCCAACTGATACAGCCCCAGGAGAGCAAGC\n+CCAAGATGTAGCAAGCAGTCCACCTGAGAGCTCCTTCCAGAAACTAGCACCCAGTGAATA\n+TAGGTATACTCTATTGAGGGATCGAGATGAGCTTTAAAAACTTGAAAAACAGTTTGTAAG\n+CCTTTCAACAGCAGCATCAACCTACGTGGTGGAAATAGTAAACCTATATTTTCATAATTC\n+TATGTGTatttttattttgaataaacagaaagaaattttgggtttttaatttttttCTCC\n+CCGACTCAAAATGCATTGTCATTTAATATAGTAGCCTCTTaaaaaaaaaaaaaCCTGCTA\n+GGATTTAAAAATAAAAATCAGAGGCCTATCTCCACTTTAAATCTGTCCTGTAAAAGTTTT\n+ATAAATCAAATGAAAGGTGACATTGCCAGAAACTTACCATTAACTTGCACTACTAGGGTA\n+GGGAGGACTTAGGATGTTTCCTGTGTCGTATGTGCTTTTCTTTCTTTCATATGATCAATT\n+CTGTTGGTATTTTCAGTATCTCATTTCTCAAAGCTAAAGAGATATACATTCTGGATACTT\n+GGGAGGGGAATAAATTAAAGTTTTCACACTGTGTACTGTGTTTTACTGATTGGTTGGATA\n+TTGCTTATGAAAATTCCATAGTGGTAtttttttGGATTCTTAATGTGTAACTTAAACATA\n+CTTTGAAGTGGAGGAGAGTCATAAGACAGAACATTTGGCAGGAATTGTCCTTATGAAACA\n+AGAAAAAGAAAATGAAAAGTATTATTAAGCTTCTGTGTTTGTCTAAAAATGTGGCATATG\n+GATGGCATTTAAAACTTTGAATGAATTATACCTAAATCTGGGACAGGGAGGTGACAGTGG\n+AACAGGCTACCAATCAGAACTAGATGACTTTTAAGGCTCCTCCTATTATGAGACTTCAAT\n+TTCCAAAGAGAAGAACTAGCAGAGAAATTGTATTTCAGTAATTTTAAGCTCCTTCTGTCT\n+TGTAGAGTCTTGTTATAGTTGTATAAATCAAAAACACAGAATAAGGAACATATTTAACtt\n+tttttCATTATAAAATGGTTAGAGGACCCTACCCCCTCTAGATTCCCTGATTTCCCCAGG\n+CCTGCAGCATACAGTAAGATGGGTCCCTGTGCCAGGCCTCAATACTGCCAGGGAATAAAA\n+CCAGAGGGAGAGGACCCTCAGTGTCATATCAGGAAGCCCAGTGCCAGAGGACAGACAGGT\n+TCAAAACTGGCTTTTCCTCTGGGCCTGGGTTGGTGCTATAGGCCAAGGGTCATTTTATAC\n+TTGGGTATAAATCAATCCCAGTTTGGGAAAAGATTATTTTTAAGCTTAAAAGGCTGACAT\n+GTGCCATTATATGTAGTATGTAATATATGTAACATCTTCCAATTCTTTTAAAATAAAATT\n+AATATTTATAATGGATATTTAATGATTGTTATTTTTAAAAACCAGCTTATAATTCCTCGT\n+TATGCATGATTTATCCAAAGTTTCCATAGTTTTATTCAAAATAATAAATGTTAATAAGGT\n+GATAAGGGGTATATTTAATGTATTGTATCAAATTGTGAATAAGAAAGTAGGATGGAGCTT\n+TCTAGAGGTTGGGCCTTAGTTCTGTTATCCTCATTGCTTTTAACCAATAAGTTAAATGAA\n+GTTAGAGTTATGGTCTTCAGGTTAGATTATGGACCAGATCTGTGAGGGTCAGCATGGAAA\n+TTCACATTCAACAAGGTAGCACACAGGACCAAGAGCAGCACATGCAATCAACTGGAATAA\n+TATAGTAATCCTGTAACTGGGTTTGAAAAAATAATCAACAAAAGATACAATTCAAGGGTT\n+AGGTTGCAGAGAGCTGGCTTGAGAGTAGTTATTATGAAAAAGGCCTCAAGGAGTACGTGT\n+TCAGTATGCTCTAAGATGATAAAGTGGCTGTTAAAAAGGGAGTTGATTTGAGGAAGTATT\n+ACTTAGCATTCATGCATATTGGGCTTAGGCTCTAGCCCTGCCACTATCATTGTCTTCTCT\n+GGACTGTGAAGTCACTGAGGACAAGGAAACTAAATTTAATGTCTGTATCACTAGTGCCTA\n+GAATTTCTGGACACTTAGTAGTCACCATCAGGCGTTTATTTAATGAATGAGAAGCAAAGT\n+GACCTTGGTTACTTTTTTACCCTGAGGGGCTCAGCACTCATTAGGACTTGGTGCCTAATT\n+TTATAAAAAGTCACTAAGCTCAAGTGCTTGGATGAAAGGACAGCGTGGATAAAAAGGTTT\n+TTAAAACATGGATGTTAAGGCTGTTTTGCTTGGAGAAGACTTGGGACTGGGACAGTCTTT\n+AGATATTATTTGAAATGCTGGCACTGTCTATCTGGATCCCAGGGCTTGAACTAGGATTTG\n+AGGAAGTCACAGGGAAGCAGATTTCAGTCTGACATTTATTCAGTGCAAGTTTTTTGGTGC\n+TGTAGTATATGATGAAAGATGTAAAGCTGAATAAAGCATTATTTCTGCCCTAGAGTTGTT\n+CACAGCCTAGTCAGGCATATGGATATGTAAACAATGACTGTAACGTGTTATAGATGTA'..b'GAAGCTGCCCTCGAGGACCTGGTCTCCACCATTCGAGTCTGAAGATTCT\n+CAGAAGCACAACCAGAGTGAGTATGAGGATTCGGCCGGCGAATGCTGCTCCTGTCCAAAG\n+ACAGACTCTCAGATCCTGAAGGAGCTGGAGGAGTCCTCGTTTAGGAAGACGTTTGAGGAT\n+TACCTGCACAACGTGGTTTTCGTCCCCAGAAAAACCTCTTCAGGCACTGGTGCCGAGGAC\n+CCTAGGCCATCTCGGAAACGCAGGTCCCTTGGCGATGTTGGGAATGTGACGGTGGCCGTG\n+CCCACGGTGGCAGCTTTCCCCAACACTTCCTCGACCAGCGTGCCCACGAGTCCGGAGGAG\n+CACAGGCCTTTTGAGAAGGTGGTGAACAAGGAGTCGCTGGTCATCTCCGGCTTGCGACAC\n+TTCACGGGCTATCGCATCGAGCTGCAGGCTTGCAACCAGGACACCCCTGAGGAACGGTGC\n+AGTGTGGCAGCCTACGTCAGTGCGAGGACCATGCCTGAAGCCAAGGCTGATGACATTGTT\n+GGCCCTGTGACGCATGAAATCTTTGAGAACAACGTCGTCCACTTGATGTGGCAGGAGCCG\n+AAGGAGCCCAATGGTCTGATCGTGCTGTATGAAGTGAGTTATCGGCGATATGGTGATGAG\n+GAGCTGCATCTCTGCGTCTCCCGCAAGCACTTCGCTCTGGAACGGGGCTGCAGGCTGCGT\n+GGGCTGTCACCGGGGAACTACAGCGTGCGAATCCGGGCCACCTCCCTTGCGGGCAACGGC\n+TCTTGGACGGAACCCACCTATTTCTACGTGACAGACTATTTAGACGTCCCGTCAAATATT\n+GCAAAAATTATCATCGGCCCCCTCATCTTTGTCTTTCTCTTCAGTGTTGTGATTGGAAGT\n+ATTTATCTATTCCTGAGAAAGAGGCAGCCAGATGGGCCGCTGGGACCGCTTTACGCTTCT\n+TCAAACCCTGAGTATCTCAGTGCCAGTGATGTGTTTCCATGCTCTGTGTACGTGCCGGAC\n+GAGTGGGAGGTGTCTCGAGAGAAGATCACCCTCCTTCGAGAGCTGGGGCAGGGCTCCTTC\n+GGCATGGTGTATGAGGGCAATGCCAGGGACATCATCAAGGGTGAGGCAGAGACCCGCGTG\n+GCGGTGAAGACGGTCAACGAGTCAGCCAGTCTCCGAGAGCGGATTGAGTTCCTCAATGAG\n+GCCTCGGTCATGAAGGGCTTCACCTGCCATCACGTGGTGCGCCTCCTGGGAGTGGTGTCC\n+AAGGGCCAGCCCACGCTGGTGGTGATGGAGCTGATGGCTCACGGAGACCTGAAGAGCTAC\n+CTCCGTTCTCTGCGGCCAGAGGCTGAGAATAATCCTGGCCGCCCTCCCCCTACCCTTCAA\n+GAGATGATTCAGATGGCGGCAGAGATTGCTGACGGGATGGCCTACCTGAACGCCAAGAAG\n+TTTGTGCATCGGGACCTGGCAGCGAGAAACTGCATGGTCGCCCATGATTTTACTGTCAAA\n+ATTGGAGACTTTGGAATGACCAGAGACATCTATGAAACGGATTACTACCGGAAAGGGGGC\n+AAGGGTCTGCTCCCTGTACGGTGGATGGCACCGGAGTCCCTGAAGGATGGGGTCTTCACC\n+ACTTCTTCTGACATGTGGTCCTTTGGCGTGGTCCTTTGGGAAATCACCAGCTTGGCAGAA\n+CAGCCTTACCAAGGCCTGTCTAATGAACAGGTGTTGAAATTTGTCATGGATGGAGGGTAT\n+CTGGATCAACCCGACAACTGTCCAGAGAGAGTCACTGACCTCATGCGCATGTGCTGGCAA\n+TTCAACCCCAAGATGAGGCCAACCTTCCTGGAGATTGTCAACCTGCTCAAGGACGACCTG\n+CACCCCAGCTTTCCAGAGGTGTCGTTCTTCCACAGCGAGGAGAACAAGGCTCCCGAGAGT\n+GAGGAGCTGGAGATGGAGTTTGAGGACATGGAGAATGTGCCCCTGGACCGTTCCTCGCAC\n+TGTCAGAGGGAGGAGGCGGGGGGCCGGGATGGAGGGTCCTCGCTGGGTTTCAAGCGGAGC\n+TACGAGGAACACATCCCTTACACACACATGAACGGAGGCAAGAAAAACGGGCGGATTCTG\n+ACCTTGCCTCGGTCCAATCCTTCCTAACAGTGCCTACCGTGGCGGGGGCGGGCAGGGGTT\n+CCCATTTTCGCTTTCCTCTGGTTTGAAAGCCTCTGGAAAACTCAGGATTCTCACGACTCT\n+ACCATGTCCAGTGGAGTTCAGAGATCGTTCCTATACATTTCTGTTCATCTTAAGGTGGAC\n+TCGTTTGGTTACCAATTTAACTAGTCCTGCAGAGGATTTAACTGTGAACCTGGAGGGCAA\n+GGGGTTTCCACAGTTGCTGCTCCTTTGGGGCAACGACGGTTTCAAACCAGGATTTTGTGT\n+TTTTTCGTTccccccacccgcccccAGCAGATGGAAAGAAAGCACCTGTTTTTACAAATT\n+CttttttttttttttttttttttttttttGCTGGTGTCTGAGCTTCAGTATAAAAGACAA\n+AACTTCCTGTTTGTGGAACAAAATTTCGAAAGAAAAAACCAAA\n+>ENA|BC112106|BC112106.1 Homo sapiens rhodopsin, mRNA (cDNA clone MGC:138311 IMAGE:8327574), complete cds\n+CCAGCTGGAGCCCTGAGTGGCTGAGCTCAGGCCTTCGCAGCATTCTTGGGTGGGAGCAGC\n+CACGGGTCAGCCACAAGGGCCACAGCCATGAATGGCACAGAAGGCCCTAACTTCTACGTG\n+CCCTTCTCCAATGCGACGGGTGTGGTACGCAGCCCCTTCGAGTACCCACAGTACTACCTG\n+GCTGAGCCATGGCAGTTCTCCATGCTGGCCGCCTACATGTTTCTGCTGATCGTGCTGGGC\n+TTCCCCATCAACTTCCTCACGCTCTACGTCACCGTCCAGCACAAGAAGCTGCGCACGCCT\n+CTCAACTACATCCTGCTCAACCTAGCCGTGGCTGACCTCTTCATGGTCCTAGGTGGCTTC\n+ACCAGCACCCTCTACACCTCTCTGCATGGATACTTCGTCTTCGGGCCCACAGGATGCAAT\n+TTGGAGGGCTTCTTTGCCACCCTGGGCGGTGAAATTGCCCTGTGGTCCTTGGTGGTCCTG\n+GCCATCGAGCGGTACGTGGTGGTGTGTAAGCCCATGAGCAACTTCCGCTTCGGGGAGAAC\n+CATGCCATCATGGGCGTTGCCTTCACCTGGGTCATGGCGCTGGCCTGCGCCGCACCCCCA\n+CTCGCCGGCTGGTCCAGGTACATCCCCGAGGGCCTGCAGTGCTCGTGTGGAATCGACTAC\n+TACACGCTCAAGCCGGAGGTCAACAACGAGTCTTTTGTCATCTACATGTTCGTGGTCCAC\n+TTCACCATCCCCATGATTATCATCTTTTTCTGCTATGGGCAGCTCGTCTTCACCGTCAAG\n+GAGGCCGCTGCCCAGCAGCAGGAGTCAGCCACCACACAGAAGGCAGAGAAGGAGGTCACC\n+CGCATGGTCATCATCATGGTCATCGCTTTCCTGATCTGCTGGGTGCCCTACGCCAGCGTG\n+GCATTCTACATCTTCACCCACCAGGGCTCCAACTTCGGTCCCATCTTCATGACCATCCCA\n+GCGTTCTTTGCCAAGAGCGCCGCCATCTACAACCCTGTCATCTATATCATGATGAACAAG\n+CAGTTCCGGAACTGCATGCTCACCACCATCTGCTGCGGCAAGAACCCACTGGGTGACGAT\n+GAGGCCTCTGCTACCGTGTCCAAGACGGAGACGAGCCAGGTGGCCCCGGCCTAAGACCTG\n+CCTAGGACTCTGTGGCCGACTATAGGCGTCTCCCATCCCCTACACCTTCCCCCAGCCACA\n+GCCATCCCACCAG\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/dustmasker_three_human.maskinfo-asn1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dustmasker_three_human.maskinfo-asn1 Thu Dec 05 06:55:59 2013 -0500
b
@@ -0,0 +1,64 @@
+Blast-db-mask-info ::= {
+  algo-id 2,
+  algo-program dust,
+  algo-options "window=64; level=20; linker=1",
+  masks {
+    masks {
+      packed-int {
+        {
+          from 1447,
+          to 1495,
+          id local id 1
+        },
+        {
+          from 1540,
+          to 1552,
+          id local id 1
+        },
+        {
+          from 1886,
+          to 1892,
+          id local id 1
+        },
+        {
+          from 2278,
+          to 2284,
+          id local id 1
+        },
+        {
+          from 4409,
+          to 4415,
+          id local id 1
+        },
+        {
+          from 4635,
+          to 4653,
+          id local id 1
+        },
+        {
+          from 4726,
+          to 4734,
+          id local id 1
+        }
+      },
+      packed-int {
+        {
+          from 139,
+          to 219,
+          id local id 2
+        },
+        {
+          from 4569,
+          to 4584,
+          id local id 2
+        },
+        {
+          from 4621,
+          to 4648,
+          id local id 2
+        }
+      }
+    },
+    more FALSE
+  }
+}
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/dustmasker_three_human.maskinfo-asn1-binary
b
Binary file test-data/dustmasker_three_human.maskinfo-asn1-binary has changed
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.log Thu Dec 05 06:55:59 2013 -0500
b
@@ -0,0 +1,10 @@
+
+
+Building a new DB, current time: 11/21/2013 11:16:27
+New DB name:   /tmp/tmpnSjpCP/tmpwAbNo4/database/files/000/dataset_2_files/blastdb
+New DB title:  Just 4 human proteins
+Sequence type: Protein
+Keep Linkouts: T
+Keep MBits: T
+Maximum file size: 1000000000B
+Adding sequences from FASTA; added 4 sequences in 0.00202417 seconds.
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.phd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.phd Thu Dec 05 06:55:59 2013 -0500
b
@@ -0,0 +1,4 @@
+11117184492
+29249033410
+36665887501
+5392473183
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.phi
b
Binary file test-data/four_human_proteins.fasta.phi has changed
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.phr
b
Binary file test-data/four_human_proteins.fasta.phr has changed
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.pin
b
Binary file test-data/four_human_proteins.fasta.pin has changed
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.pog
b
Binary file test-data/four_human_proteins.fasta.pog has changed
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.psd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta.psd Thu Dec 05 06:55:59 2013 -0500
b
@@ -0,0 +1,4 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
+gnl|bl_ord_id|33
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.psi
b
Binary file test-data/four_human_proteins.fasta.psi has changed
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/four_human_proteins.fasta.psq
b
Binary file test-data/four_human_proteins.fasta.psq has changed
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/tblastn_four_human_vs_rhodopsin.html
--- a/test-data/tblastn_four_human_vs_rhodopsin.html Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin.html Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -3,7 +3,7 @@\n <BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">\n <PRE>\n \n-<b>TBLASTN 2.2.26+</b>\n+<b>TBLASTN 2.2.28+</b>\n \n \n <b>Query=</b> sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44\n@@ -20,12 +20,12 @@\n \n \n \n-Lambda     K      H\n-   0.347    0.182    0.684 \n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 127710\n \n@@ -45,12 +45,12 @@\n \n \n \n-Lambda     K      H\n-   0.347    0.182    0.684 \n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 127710\n \n@@ -70,12 +70,12 @@\n \n \n \n-Lambda     K      H\n-   0.347    0.182    0.684 \n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 127710\n \n@@ -95,12 +95,12 @@\n \n \n \n-Lambda     K      H\n-   0.347    0.182    0.684 \n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 127710\n \n@@ -120,12 +120,12 @@\n \n \n \n-Lambda     K      H\n-   0.347    0.182    0.684 \n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 127710\n \n@@ -145,12 +145,12 @@\n \n \n \n-Lambda     K      H\n-   0.347    0.182    0.684 \n+Lambda      K        H        a         alpha\n+   0.347    0.182    0.684    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 127710\n \n@@ -169,12 +169,12 @@\n \n \n \n-Lambda     K      H\n-   0.334    0.170    0.615 \n+Lambda      K        H        a         alpha\n+   0.334    0.170    0.615    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 370988\n \n@@ -194,12 +194,12 @@\n \n \n \n-Lambda     K      H\n-   0.334    0.170    0.615 \n+Lambda      K        H        a         alpha\n+   0.334    0.170    0.615    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 370988\n \n@@ -219,12 +219,12 @@\n \n \n \n-Lambda     K      H\n-   0.334    0.170    0.615 \n+Lambda      K        H        a         alpha\n+   0.334    0.170    0.615    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 370988\n \n@@ -244,12 +244,12 @@\n \n \n \n-Lambda     K      H\n-   0.334    0.170    0.615 \n+Lambda      K        H        a         alpha\n+   0.334    0.170    0.615    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.2'..b'bda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 441350\n \n@@ -368,12 +368,12 @@\n \n \n \n-Lambda     K      H\n-   0.346    0.180    0.700 \n+Lambda      K        H        a         alpha\n+   0.346    0.180    0.700    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 441350\n \n@@ -393,12 +393,12 @@\n \n \n \n-Lambda     K      H\n-   0.346    0.180    0.700 \n+Lambda      K        H        a         alpha\n+   0.346    0.180    0.700    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 441350\n \n@@ -418,12 +418,12 @@\n \n \n \n-Lambda     K      H\n-   0.346    0.180    0.700 \n+Lambda      K        H        a         alpha\n+   0.346    0.180    0.700    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 441350\n \n@@ -443,12 +443,12 @@\n \n \n \n-Lambda     K      H\n-   0.346    0.180    0.700 \n+Lambda      K        H        a         alpha\n+   0.346    0.180    0.700    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 441350\n \n@@ -492,12 +492,12 @@\n \n \n \n-Lambda     K      H\n-   0.351    0.182    0.707 \n+Lambda      K        H        a         alpha\n+   0.351    0.182    0.707    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 109230\n \n@@ -542,12 +542,12 @@\n \n \n \n-Lambda     K      H\n-   0.351    0.182    0.707 \n+Lambda      K        H        a         alpha\n+   0.351    0.182    0.707    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 109230\n \n@@ -616,12 +616,12 @@\n \n \n \n-Lambda     K      H\n-   0.351    0.182    0.707 \n+Lambda      K        H        a         alpha\n+   0.351    0.182    0.707    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 109230\n \n@@ -666,12 +666,12 @@\n \n \n \n-Lambda     K      H\n-   0.351    0.182    0.707 \n+Lambda      K        H        a         alpha\n+   0.351    0.182    0.707    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 109230\n \n@@ -716,12 +716,12 @@\n \n \n \n-Lambda     K      H\n-   0.351    0.182    0.707 \n+Lambda      K        H        a         alpha\n+   0.351    0.182    0.707    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 109230\n \n@@ -766,12 +766,12 @@\n \n \n \n-Lambda     K      H\n-   0.351    0.182    0.707 \n+Lambda      K        H        a         alpha\n+   0.351    0.182    0.707    0.522     1.92 \n \n Gapped\n-Lambda     K      H\n-   0.299   0.0710    0.270 \n+Lambda      K        H        a         alpha    sigma\n+   0.299   0.0710    0.270     1.10     13.8     14.5 \n \n Effective search space used: 109230\n \n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/tblastn_four_human_vs_rhodopsin.xml
--- a/test-data/tblastn_four_human_vs_rhodopsin.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -2,7 +2,7 @@\n <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n <BlastOutput>\n   <BlastOutput_program>tblastn</BlastOutput_program>\n-  <BlastOutput_version>TBLASTN 2.2.26+</BlastOutput_version>\n+  <BlastOutput_version>TBLASTN 2.2.28+</BlastOutput_version>\n   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n   <BlastOutput_db></BlastOutput_db>\n   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n@@ -17,706 +17,725 @@\n       <Parameters_filter>F</Parameters_filter>\n     </Parameters>\n   </BlastOutput_param>\n-  <BlastOutput_iterations>\n-    <Iteration>\n-      <Iteration_iter-num>1</Iteration_iter-num>\n-      <Iteration_query-ID>Query_1</Iteration_query-ID>\n-      <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n-      <Iteration_query-len>406</Iteration_query-len>\n-      <Iteration_hits></Iteration_hits>\n-      <Iteration_stat>\n-        <Statistics>\n-          <Statistics_db-num>0</Statistics_db-num>\n-          <Statistics_db-len>0</Statistics_db-len>\n-          <Statistics_hsp-len>19</Statistics_hsp-len>\n-          <Statistics_eff-space>127710</Statistics_eff-space>\n-          <Statistics_kappa>0.071</Statistics_kappa>\n-          <Statistics_lambda>0.299</Statistics_lambda>\n-          <Statistics_entropy>0.27</Statistics_entropy>\n-        </Statistics>\n-      </Iteration_stat>\n-      <Iteration_message>No hits found</Iteration_message>\n-    </Iteration>\n-    <Iteration>\n-      <Iteration_iter-num>2</Iteration_iter-num>\n-      <Iteration_query-ID>Query_1</Iteration_query-ID>\n-      <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n-      <Iteration_query-len>406</Iteration_query-len>\n-      <Iteration_hits></Iteration_hits>\n-      <Iteration_stat>\n-        <Statistics>\n-          <Statistics_db-num>0</Statistics_db-num>\n-          <Statistics_db-len>0</Statistics_db-len>\n-          <Statistics_hsp-len>19</Statistics_hsp-len>\n-          <Statistics_eff-space>127710</Statistics_eff-space>\n-          <Statistics_kappa>0.071</Statistics_kappa>\n-          <Statistics_lambda>0.299</Statistics_lambda>\n-          <Statistics_entropy>0.27</Statistics_entropy>\n-        </Statistics>\n-      </Iteration_stat>\n-      <Iteration_message>No hits found</Iteration_message>\n-    </Iteration>\n-    <Iteration>\n-      <Iteration_iter-num>3</Iteration_iter-num>\n-      <Iteration_query-ID>Query_1</Iteration_query-ID>\n-      <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n-      <Iteration_query-len>406</Iteration_query-len>\n-      <Iteration_hits></Iteration_hits>\n-      <Iteration_stat>\n-        <Statistics>\n-          <Statistics_db-num>0</Statistics_db-num>\n-          <Statistics_db-len>0</Statistics_db-len>\n-          <Statistics_hsp-len>19</Statistics_hsp-len>\n-          <Statistics_eff-space>127710</Statistics_eff-space>\n-          <Statistics_kappa>0.071</Statistics_kappa>\n-          <Statistics_lambda>0.299</Statistics_lambda>\n-          <Statistics_entropy>0.27</Statistics_entropy>\n-        </Statistics>\n-      </Iteration_stat>\n-      <Iteration_message>No hits found</Iteration_message>\n-    </Iteration>\n-    <Iteration>\n-      <Iteration_iter-num>4</Iteration_iter-num>\n-      <Iteration_query-ID>Query_1</Iteration_query-ID>\n-      <Iteration_query-def>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1</Iteration_query-def>\n-      <Iteration_query-len>406</Iteration_query-len>\n-      <Iter'..b'YYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFYVPFSN TGVVRSPFE PQYYLAEPWQFSMLAAYMFLLI+LGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMV GGFT+TLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPL GWSRYIPEG+QCSCGIDYYT   E NNESFVIYMFVVHF IP+I+IFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICW+PYA VAFYIFTHQGS+FGPIFMTIPAFFAK++A+YNPVIYIMMNKQFRNCM+TT+CCGKNPLGDDEAS TVSKTETSQVAPA</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>18</Statistics_hsp-len>\n+      <Statistics_eff-space>109230</Statistics_eff-space>\n+      <Statistics_kappa>0.071</Statistics_kappa>\n+      <Statistics_lambda>0.299</Statistics_lambda>\n+      <Statistics_entropy>0.27</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+<Iteration>\n+  <Iteration_iter-num>24</Iteration_iter-num>\n+  <Iteration_query-ID>Query_4</Iteration_query-ID>\n+  <Iteration_query-def>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1</Iteration_query-def>\n+  <Iteration_query-len>348</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+  <Hit_num>1</Hit_num>\n+  <Hit_id>Subject_6</Hit_id>\n+  <Hit_def>gi|12583664|dbj|AB043817.1| Conger myriaster conf gene for fresh water form rod opsin, complete cds</Hit_def>\n+  <Hit_accession>Subject_6</Hit_accession>\n+  <Hit_len>1344</Hit_len>\n+  <Hit_hsps>\n+    <Hsp>\n+      <Hsp_num>1</Hsp_num>\n+      <Hsp_bit-score>626.708</Hsp_bit-score>\n+      <Hsp_score>1444</Hsp_score>\n+      <Hsp_evalue>0</Hsp_evalue>\n+      <Hsp_query-from>1</Hsp_query-from>\n+      <Hsp_query-to>341</Hsp_query-to>\n+      <Hsp_hit-from>23</Hsp_hit-from>\n+      <Hsp_hit-to>1048</Hsp_hit-to>\n+      <Hsp_query-frame>0</Hsp_query-frame>\n+      <Hsp_hit-frame>2</Hsp_hit-frame>\n+      <Hsp_identity>281</Hsp_identity>\n+      <Hsp_positive>311</Hsp_positive>\n+      <Hsp_gaps>1</Hsp_gaps>\n+      <Hsp_align-len>342</Hsp_align-len>\n+      <Hsp_qseq>MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE</Hsp_qseq>\n+      <Hsp_hseq>MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE</Hsp_hseq>\n+      <Hsp_midline>MNGTEGPNFY+P SNATGVVRSPFEYPQYYLAEPW FS L+AYMF LI+ GFPINFLTLYVT++HKKLRTPLNYILLNLAVADLFMV GGFT+T+YTS+HGYFVFGPTGCN+EGFFATLGGEIALW LVVLAIER++VVCKP++NFRFGE HAIMGV  TW MALACA PPL GWSRYIPEGLQCSCGIDYYT  P +NNESFVIYMF  HF+IP+ +I FCYG+LV TVKEAAAQQQES TTQ+AE+EVTRMV+IMVI+FL+CWVPYASVA YIFTHQGS FGPIFMTIP+FFAKS+A+YNP+IYI MNKQFR CM+TT+CCGKNP   +D ASAT SKTE</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>0</Statistics_db-num>\n+      <Statistics_db-len>0</Statistics_db-len>\n+      <Statistics_hsp-len>18</Statistics_hsp-len>\n+      <Statistics_eff-space>109230</Statistics_eff-space>\n+      <Statistics_kappa>0.071</Statistics_kappa>\n+      <Statistics_lambda>0.299</Statistics_lambda>\n+      <Statistics_entropy>0.27</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff test-data/tblastn_four_human_vs_rhodopsin_ext.tabular
--- a/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Mon Sep 23 06:14:13 2013 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin_ext.tabular Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,10 +1,10 @@\n-sp|P08100|OPSD_HUMAN\tgi|57163782|ref|NM_001009242.1|\t96.55\t348\t12\t0\t1\t348\t1\t1044\t0.0\t 732\tgi|57163782|ref|NM_001009242.1|\t1689\t336\t343\t0\t98.56\t0\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\t348\t1047\n-sp|P08100|OPSD_HUMAN\tgi|2734705|gb|U59921.1|BBU59921\t84.80\t342\t51\t1\t1\t341\t42\t1067\t0.0\t 646\tgi|2734705|gb|U59921.1|BBU59921\t1489\t290\t320\t1\t93.57\t0\t3\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t1574\n-sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t93.24\t74\t5\t0\t239\t312\t3147\t3368\t1e-72\t 151\tgi|283855845|gb|GQ290303.1|\t342\t69\t73\t0\t98.65\t0\t3\tESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ\tESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ\t348\t4301\n-sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t91.53\t59\t5\t0\t177\t235\t2855\t3031\t1e-72\t 126\tgi|283855845|gb|GQ290303.1|\t284\t54\t57\t0\t96.61\t0\t2\tRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA\tRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS\t348\t4301\n-sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t96.40\t111\t4\t0\t11\t121\t1\t333\t9e-67\t 229\tgi|283855845|gb|GQ290303.1|\t523\t107\t109\t0\t98.20\t0\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG\t348\t4301\n-sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t93.22\t59\t4\t0\t119\t177\t1404\t1580\t1e-32\t 122\tgi|283855845|gb|GQ290303.1|\t276\t55\t56\t0\t94.92\t0\t3\tLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR\tLAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR\t348\t4301\n-sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t88.46\t26\t3\t0\t312\t337\t4222\t4299\t2e-12\t57.7\tgi|283855845|gb|GQ290303.1|\t125\t23\t24\t0\t92.31\t0\t1\tQFRNCMLTTICCGKNPLGDDEASATV\tQFRNCMLTTLCCGKNPLGDDEASTTA\t348\t4301\n-sp|P08100|OPSD_HUMAN\tgi|283855822|gb|GQ290312.1|\t95.09\t326\t16\t0\t11\t336\t1\t978\t0.0\t 658\tgi|283855822|gb|GQ290312.1|\t1517\t310\t322\t0\t98.77\t0\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT\t348\t983\n-sp|P08100|OPSD_HUMAN\tgi|18148870|dbj|AB062417.1|\t93.39\t348'..b'KSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t1574\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t93.24\t74\t5\t0\t239\t312\t3147\t3368\t1e-72\t 151\tgi|283855845|gb|GQ290303.1|\t342\t69\t73\t0\t98.65\t0\t3\tESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQ\tESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQ\t348\t4301\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t91.53\t59\t5\t0\t177\t235\t2855\t3031\t1e-72\t 126\tgi|283855845|gb|GQ290303.1|\t284\t54\t57\t0\t96.61\t0\t2\tRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAA\tRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEVRS\t348\t4301\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t96.40\t111\t4\t0\t11\t121\t1\t333\t9e-67\t 229\tgi|283855845|gb|GQ290303.1|\t523\t107\t109\t0\t98.20\t0\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGG\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGG\t348\t4301\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t93.22\t59\t4\t0\t119\t177\t1404\t1580\t1e-32\t 122\tgi|283855845|gb|GQ290303.1|\t276\t55\t56\t0\t94.92\t0\t3\tLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSR\tLAGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSR\t348\t4301\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855845|gb|GQ290303.1|\t88.46\t26\t3\t0\t312\t337\t4222\t4299\t2e-12\t57.7\tgi|283855845|gb|GQ290303.1|\t125\t23\t24\t0\t92.31\t0\t1\tQFRNCMLTTICCGKNPLGDDEASATV\tQFRNCMLTTLCCGKNPLGDDEASTTA\t348\t4301\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|283855822|gb|GQ290312.1|\t95.09\t326\t16\t0\t11\t336\t1\t978\t0.0\t 658\tgi|283855822|gb|GQ290312.1|\t1517\t310\t322\t0\t98.77\t0\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASAT\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTT\t348\t983\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|18148870|dbj|AB062417.1|\t93.39\t348\t23\t0\t1\t348\t1\t1044\t0.0\t 711\tgi|18148870|dbj|AB062417.1|\t1640\t325\t337\t0\t96.84\t0\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t1047\tN/A\n+sp|P08100|OPSD_HUMAN\tgi|12583664|dbj|AB043817.1|\t82.16\t342\t60\t1\t1\t341\t23\t1048\t0.0\t 626\tgi|12583664|dbj|AB043817.1|\t1444\t281\t311\t1\t90.94\t0\t2\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE\t348\t1344\tN/A\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/README.rst Thu Dec 05 06:55:59 2013 -0500
b
@@ -5,12 +5,12 @@
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 See the licence text below.
 
-Currently tested with NCBI BLAST 2.2.26+ (i.e. version 2.2.26 of BLAST+),
-and does not work with the NCBI 'legacy' BLAST suite (e.g. blastall).
+Currently tested with NCBI BLAST 2.2.28+ (i.e. version 2.2.28 of BLAST+),
+and does not work with the NCBI 'legacy' BLAST suite (e.g. ``blastall``).
 
 Note that these wrappers (and the associated datatypes) were originally
 distributed as part of the main Galaxy repository, but as of August 2012
-moved to the Galaxy Tool Shed as 'ncbi_blast_plus' (and 'blast_datatypes').
+moved to the Galaxy Tool Shed as ``ncbi_blast_plus`` (and ``blast_datatypes``).
 My thanks to Dannon Baker from the Galaxy development team for his assistance
 with this.
 
@@ -22,9 +22,9 @@
 ======================
 
 Galaxy should be able to automatically install the dependencies, i.e. the
-'blast_datatypes' repository which defines the BLAST XML file format
-('blastxml') and protein and nucleotide BLAST databases ('blastdbp' and
-'blastdbn').
+``blast_datatypes`` repository which defines the BLAST XML file format
+(``blastxml``) and protein and nucleotide BLAST databases (``blastdbp`` and
+``blastdbn``).
 
 You must tell Galaxy about any system level BLAST databases using configuration
 files blastdb.loc (nucleotide databases like NT) and blastdb_p.loc (protein
@@ -42,9 +42,9 @@
 ===================
 
 For those not using Galaxy's automated installation from the Tool Shed, put
-the XML and Python files in the tools/ncbi_blast_plus/ folder and add the XML
-files to your tool_conf.xml as normal (and do the same in tool_conf.xml.sample
-in order to run the unit tests). For example, use::
+the XML and Python files in the ``tools/ncbi_blast_plus/`` folder and add the
+XML files to your ``tool_conf.xml`` as normal (and do the same in
+``tool_conf.xml.sample`` in order to run the unit tests). For example, use::
 
   <section name="NCBI BLAST+" id="ncbi_blast_plus_tools">
     <tool file="ncbi_blast_plus/ncbi_blastn_wrapper.xml" />
@@ -53,6 +53,7 @@
     <tool file="ncbi_blast_plus/ncbi_tblastn_wrapper.xml" />
     <tool file="ncbi_blast_plus/ncbi_tblastx_wrapper.xml" />
     <tool file="ncbi_blast_plus/ncbi_makeblastdb.xml" />
+    <tool file="ncbi_blast_plus/ncbi_dustmasker_wrapper.xml" />
     <tool file="ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml" />
     <tool file="ncbi_blast_plus/ncbi_blastdbcmd_info.xml" />
     <tool file="ncbi_blast_plus/ncbi_rpsblast_wrapper.xml" />
@@ -60,18 +61,21 @@
     <tool file="ncbi_blast_plus/blastxml_to_tabular.xml" />
   </section>
 
-You will also need to install 'blast_datatypes' from the Tool Shed. This
-defines the BLAST XML file format ('blastxml') and protein and nucleotide
-BLAST databases composite file formats ('blastdbp' and 'blastdbn').
+You will also need to install ``blast_datatypes`` from the Tool Shed. This
+defines the BLAST XML file format (``blastxml``) and protein and nucleotide
+BLAST databases composite file formats (``blastdbp`` and ``blastdbn``):
+
+* http://toolshed.g2.bx.psu.edu/view/devteam/blast_datatypes
 
 As described above for an automated installation, you must also tell Galaxy
-about any system level BLAST databases using the tool-data/blastdb*.loc files.
+about any system level BLAST databases using the ``tool-data/blastdb*.loc``
+files.
 
 You must install the NCBI BLAST+ standalone tools somewhere on the system
-path. Currently the unit tests are written using "BLAST 2.2.26+".
+path. Currently the unit tests are written using "BLAST 2.2.28+".
 
 Run the functional tests (adjusting the section identifier to match your
-tool_conf.xml.sample file)::
+``tool_conf.xml.sample`` file)::
 
     ./run_functional_tests.sh -sid NCBI_BLAST+-ncbi_blast_plus_tools
 
@@ -117,6 +121,21 @@
         - Adopted standard MIT License.
         - Development moved to GitHub, https://github.com/peterjc/galaxy_blast
         - Updated citation information (Cock et al. 2013).
+v0.0.21 - Use macros to simplify the XML wrappers.
+        - Added wrapper for dustmasker
+        - Enabled masking for makeblastdb
+        - Requires 'maskinfo-asn1' and 'maskinfo-asn1-binary' datatypes
+          defined in updated blast_datatypes on Galaxy ToolShed.
+        - Tests updated for BLAST+ 2.2.27 instead of BLAST+ 2.2.26
+        - Now depends on package_blast_plus_2_2_27 in ToolShed
+v0.0.22 - More use macros to simplify the wrappers
+        - Set number of threads via $GALAXY_SLOTS environment variable
+        - More descriptive default output names
+        - Tests require updated BLAST DB definitions (blast_datatypes v0.0.18)
+        - Pre-check for duplicate identifiers in makeblastdb wrapper.
+        - Tests updated for BLAST+ 2.2.28 instead of BLAST+ 2.2.27
+        - Now depends on package_blast_plus_2_2_28 in ToolShed
+        - Extended tabular output includes 'salltitles' as column 25.
 ======= ======================================================================
 
 
@@ -140,11 +159,16 @@
 For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball I use
 the following command from the GitHub repository root folder::
 
-    $ ./ncbi_blast_plus/make_ncbi_blast_plus.sh
+    $ tools/ncbi_blast_plus/make_ncbi_blast_plus.sh
 
 This simplifies ensuring a consistent set of files is bundled each time,
 including all the relevant test files.
 
+When updating the version of BLAST+, many of the sample data files used for
+the unit tests must be regenerated. This script automates that task::
+
+    $ tools/ncbi_blast_plus/update_test_files.sh
+
 
 Licence (MIT)
 =============
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/blastxml_to_tabular.py
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Thu Dec 05 06:55:59 2013 -0500
[
@@ -31,7 +31,7 @@
 ====== ============= ===========================================
 Column NCBI name     Description
 ------ ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    13 sallseqid     All subject Seq-id(s), separated by ';'
     14 score         Raw score
     15 nident        Number of identical matches
     16 positive      Number of positive-scoring matches
@@ -43,6 +43,7 @@
     22 sseq          Aligned part of subject sequence
     23 qlen          Query sequence length
     24 slen          Subject sequence length
+    25 salltitles    All subject titles, separated by '&lt;&gt;'
 ====== ============= ===========================================
 
 Most of these fields are given explicitly in the XML file, others some like
@@ -63,7 +64,7 @@
 import re
 
 if "-v" in sys.argv or "--version" in sys.argv:
-    print "v0.0.12"
+    print "v0.0.22"
     sys.exit(0)
 
 if sys.version_info[:2] >= ( 2, 5 ):
@@ -89,11 +90,11 @@
 if out_fmt == "std":
     extended = False
 elif out_fmt == "x22":
-    stop_err("Format argument x22 has been replaced with ext (extended 24 columns)")
+    stop_err("Format argument x22 has been replaced with ext (extended 25 columns)")
 elif out_fmt == "ext":
     extended = True
 else:
-    stop_err("Format argument should be std (12 column) or ext (extended 24 columns)")
+    stop_err("Format argument should be std (12 column) or ext (extended 25 columns), not: %r" % out_fmt)
 
 
 # get an iterable
@@ -157,6 +158,11 @@
             # <Hit_accession>Subject_1</Hit_accession>
             #
             #apparently depending on the parse_deflines switch
+            #
+            #Or, with BLAST 2.2.28+ can get this,
+            # <Hit_id>gnl|BL_ORD_ID|2</Hit_id>
+            # <Hit_def>chrIII gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence</Hit_def>
+            # <Hit_accession>2</Hit_accession>
             sseqid = hit.findtext("Hit_id").split(None,1)[0]
             hit_def = sseqid + " " + hit.findtext("Hit_def")
             if re_default_subject_id.match(sseqid) \
@@ -164,6 +170,11 @@
                 #Place holder ID, take the first word of the subject definition
                 hit_def = hit.findtext("Hit_def")
                 sseqid = hit_def.split(None,1)[0]
+            if sseqid.startswith("gnl|BL_ORD_ID|") \
+            and sseqid == "gnl|BL_ORD_ID|" + hit.findtext("Hit_accession"):
+                #Alternative place holder ID, again take the first word of hit_def
+                hit_def = hit.findtext("Hit_def")
+                sseqid = hit_def.split(None,1)[0]
             # for every <Hsp> within <Hit>
             for hsp in hit.findall("Hit_hsps/Hsp"):
                 nident = hsp.findtext("Hsp_identity")
@@ -228,7 +239,11 @@
                           ]
 
                 if extended:
-                    sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(">"))
+                    try:
+                        sallseqid = ";".join(name.split(None,1)[0] for name in hit_def.split(" >"))
+                        salltitles = "<>".join(name.split(None,1)[1] for name in hit_def.split(" >"))
+                    except IndexError as e:
+                        stop_err("Problem splitting multuple hits?\n%r\n--> %s" % (hit_def, e))
                     #print hit_def, "-->", sallseqid
                     positive = hsp.findtext("Hsp_positive")
                     ppos = "%0.2f" % (100*float(positive)/float(length))
@@ -252,6 +267,7 @@
                                    h_seq,
                                    str(qlen),
                                    str(slen),
+                                   salltitles,
                                    ])
                 #print "\t".join(values) 
                 outfile.write("\t".join(values) + "\n")
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/blastxml_to_tabular.xml
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Thu Dec 05 06:55:59 2013 -0500
b
@@ -1,4 +1,4 @@
-<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.11">
+<tool id="blastxml_to_tabular" name="BLAST XML to tabular" version="0.0.22">
     <description>Convert BLAST XML output to tabular</description>
     <version_command interpreter="python">blastxml_to_tabular.py --version</version_command>
     <command interpreter="python">
@@ -17,7 +17,7 @@
         </param>
     </inputs>
     <outputs>
-        <data name="tabular_file" format="tabular" label="BLAST results as tabular" />
+        <data name="tabular_file" format="tabular" label="$blastxml_file.display_name (as tabular)" />
     </outputs>
     <requirements>
     </requirements>
@@ -70,6 +70,16 @@
             <!-- Note this has some white space differences from the actual blastp output -->
             <output name="tabular_file" file="blastp_human_vs_pdb_seg_no_converted_ext.tabular" ftype="tabular" />
         </test>
+        <test>
+            <param name="blastxml_file" value="blastn_arabidopsis.xml" ftype="blastxml" />
+            <param name="out_format" value="std" />
+            <output name="tabular_file" file="blastn_arabidopsis.standard.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="blastxml_file" value="blastn_arabidopsis.xml" ftype="blastxml" />
+            <param name="out_format" value="ext" />
+            <output name="tabular_file" file="blastn_arabidopsis.extended.tabular" ftype="tabular" />
+        </test>
     </tests>
     <help>
     
@@ -104,13 +114,13 @@
 but this takes longer to calculate. Most (but not all) of these columns are
 included by selecting the extended tabular output. The extra columns are
 included *after* the standard 12 columns. This is so that you can write
-workflow filtering steps that accept either the 12 or 22 column tabular
-BLAST output. This tool now uses this extended 24 column output by default.
+workflow filtering steps that accept either the 12 or 25 column tabular
+BLAST output. This tool now uses this extended 25 column output by default.
 
 ====== ============= ===========================================
 Column NCBI name     Description
 ------ ------------- -------------------------------------------
-    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    13 sallseqid     All subject Seq-id(s), separated by ';'
     14 score         Raw score
     15 nident        Number of identical matches
     16 positive      Number of positive-scoring matches
@@ -122,6 +132,7 @@
     22 sseq          Aligned part of subject sequence
     23 qlen          Query sequence length
     24 slen          Subject sequence length
+    25 salltitles    All subject title(s), separated by '&lt;&gt;'
 ====== ============= ===========================================
 
 Beware that the XML file (and thus the conversion) and the tabular output
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/check_no_duplicates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/check_no_duplicates.py Thu Dec 05 06:55:59 2013 -0500
[
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+"""Check for duplicate sequence identifiers in FASTA files.
+
+This is run as a pre-check before makeblastdb, in order to avoid
+a regression bug in BLAST+ 2.2.28 which fails to catch this. See:
+http://blastedbio.blogspot.co.uk/2012/10/my-ids-not-good-enough-for-ncbi-blast.html
+
+This script takes one or more FASTA filenames as input, and
+will return a non-zero error if any duplicate identifiers
+are found.
+"""
+import sys
+import os
+
+if "-v" in sys.argv or "--version" in sys.argv:
+    print("v0.0.22")
+    sys.exit(0)
+
+def stop_err(msg, error=1):
+    sys.stderr.write("%s\n" % msg)
+    sys.exit(error)
+
+
+identifiers = set()
+files = 0
+for filename in sys.argv[1:]:
+    if not os.path.isfile(filename):
+        stop_err("Missing FASTA file %r" % filename, 2)
+    files += 1
+    handle = open(filename)
+    for line in handle:
+        if line.startswith(">"):
+            #The split will also take care of the new line character,
+            #e.g. ">test\n" and ">test description here\n" both give "test"
+            seq_id = line[1:].split(None, 1)[0]
+            if seq_id in identifiers:
+                handle.close()
+                stop_err("Repeated identifiers, e.g. %r" % seq_id, 1)
+            identifiers.add(seq_id)
+    handle.close()
+if not files:
+    stop_err("No FASTA files given to check for duplicates", 3)
+elif files == 1:
+    print("%i sequences" % len(identifiers))
+else:
+    print("%i sequences in %i FASTA files" % (len(identifiers), files))
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml Thu Dec 05 06:55:59 2013 -0500
b
@@ -1,46 +1,16 @@
-<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.6">
+<tool id="ncbi_blastdbcmd_info" name="NCBI BLAST+ database info" version="0.0.22">
     <description>Show BLAST database information from blastdbcmd</description>
-    <requirements>
-        <requirement type="binary">blastdbcmd</requirement>
-        <requirement type="package" version="2.2.26+">blast+</requirement>
-    </requirements>
-    <version_command>blastdbcmd -version</version_command>
+    <macros>
+        <token name="@BINARY@">blastdbcmd</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
     <command>
 blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" -info -out "$info"
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
- <!-- Suspect blastdbcmd sometimes fails to set error level -->
- <regex match="Error:" />
- <regex match="Exception:" />
-    </stdio>
+    <expand macro="stdio" />
     <inputs>
-        <conditional name="db_opts">
-            <param name="db_type" type="select" label="Type of BLAST database">
-              <option value="nucl" selected="True">Nucleotide</option>
-              <option value="prot">Protein</option>
-            </param>
-            <when value="nucl">
-                <param name="database" type="select" label="Nucleotide BLAST database">
-                    <options from_file="blastdb.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
-                </param>
-            </when>
-            <when value="prot">
-                <param name="database" type="select" label="Protein BLAST database">
-                    <options from_file="blastdb_p.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
-                </param>
-            </when>
-        </conditional>
+        <expand macro="input_conditional_choose_db_type" />
     </inputs>
     <outputs>
         <data name="info" format="txt" label="${db_opts.database.fields.name} info" />
@@ -60,17 +30,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:
 
-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Thu Dec 05 06:55:59 2013 -0500
[
@@ -1,10 +1,10 @@
-<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.6">
+<tool id="ncbi_blastdbcmd_wrapper" name="NCBI BLAST+ blastdbcmd entry(s)" version="0.0.22">
     <description>Extract sequence(s) from BLAST database</description>
-    <requirements>
-        <requirement type="binary">blastdbcmd</requirement>
-        <requirement type="package" version="2.2.26+">blast+</requirement>
-    </requirements>
-    <version_command>blastdbcmd -version</version_command>
+    <macros>
+        <token name="@BINARY@">blastdbcmd</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
     <command>
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
@@ -47,39 +47,9 @@
 | sed 's/>\(lcl|\|gnl|BL_ORD_ID|[0-9]* \)/>/1' > "$seq"
 #end if
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
- <!-- Suspect blastdbcmd sometimes fails to set error level -->
- <regex match="Error:" />
- <regex match="Exception:" />
-    </stdio>
+    <expand macro="stdio" />
     <inputs>
-        <conditional name="db_opts">
-            <param name="db_type" type="select" label="Type of BLAST database">
-              <option value="nucl" selected="True">Nucleotide</option>
-              <option value="prot">Protein</option>
-            </param>
-            <when value="nucl">
-                <param name="database" type="select" label="Nucleotide BLAST database">
-                    <options from_file="blastdb.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
-                </param>
-            </when>
-            <when value="prot">
-                <param name="database" type="select" label="Protein BLAST database">
-                    <options from_file="blastdb_p.loc">
-                      <column name="value" index="0"/>
-                      <column name="name" index="1"/>
-                      <column name="path" index="2"/>
-                    </options>
-                </param>
-            </when>
-        </conditional>
+        <expand macro="input_conditional_choose_db_type" />
         <conditional name="id_opts">
             <param name="id_type" type="select" label="Type of identifier list">
               <option value="file">From file</option>
@@ -132,17 +102,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:
 
-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,91 +1,40 @@\n-<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.20">\n+<tool id="ncbi_blastn_wrapper" name="NCBI BLAST+ blastn" version="0.0.22">\n     <description>Search nucleotide database with nucleotide query sequence(s)</description>\n     <!-- If job splitting is enabled, break up the query file into parts -->\n-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n-    <requirements>\n-        <requirement type="binary">blastn</requirement>\n-        <requirement type="package" version="2.2.26+">blast+</requirement>\n-    </requirements>\n-    <version_command>blastn -version</version_command>\n+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n+    <macros>\n+        <token name="@BINARY@">blastn</token>\n+        <import>ncbi_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n     <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n blastn\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n-  -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n-  -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n-  -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -task $blast_type\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n-    -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n $adv_opts.strand\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n+@ADVANCED_OPTIONS@\n #if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ):\n -perc_identity $adv_opts.identity_cutoff\n #end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n $adv_opts.ungapped\n-$adv_opts.parse_deflines\n ## End of advanced options:\n #end if\n     </command>\n-    <stdio>\n-        <!-- Anything other than zero is an error -->\n-        <exit_code range="1:" />\n-        <exit_code range=":-1" />\n-        <!-- In case the return code has not been set propery check stderr too -->\n-        <regex match="Error:" />\n-        <regex match="Exception:" />\n-    </stdio>\n+\n+    <expand macro="stdio" />\n+\n     <inputs>\n         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> \n-        <conditional name="db_opts">\n-            <param name="db_opts_selector" type="select" label="Subject database/sequences">\n-              <option value="db" selected="True">Locally installed BLAST database</option>\n-              <option value="histdb">BLAST database from your history</option>\n-              <option value="file">FASTA file from your history (see warning note below)</option>\n-            </param>\n-            <when value="db">\n-                <param name="database" type="select" label="Nucleotide BLAST database">\n-                    <options from_file="blastdb.loc">\n-                      <column name="value" index="0"/>\n-                      <column name="name" index="1"/>\n-                      <column name="path" index="2"/>\n-                    </options>\n-                </param>\n-                <param name="histdb" type="hidden" value="" />\n-                <param name="subject" type="hidden" value="" /> \n-         '..b"not* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n-     1 qseqid    Query Seq-id (ID of your sequence)\n-     2 sseqid    Subject Seq-id (ID of the database hit)\n-     3 pident    Percentage of identical matches\n-     4 length    Alignment length\n-     5 mismatch  Number of mismatches\n-     6 gapopen   Number of gap openings\n-     7 qstart    Start of alignment in query\n-     8 qend      End of alignment in query\n-     9 sstart    Start of alignment in subject (database hit)\n-    10 send      End of alignment in subject (database hit)\n-    11 evalue    Expectation value (E-value)\n-    12 bitscore  Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name     Description\n------- ------------- -------------------------------------------\n-    13 sallseqid     All subject Seq-id(s), separated by a ';'\n-    14 score         Raw score\n-    15 nident        Number of identical matches\n-    16 positive      Number of positive-scoring matches\n-    17 gaps          Total number of gaps\n-    18 ppos          Percentage of positive-scoring matches\n-    19 qframe        Query frame\n-    20 sframe        Subject frame\n-    21 qseq          Aligned part of query sequence\n-    22 sseq          Aligned part of subject sequence\n-    23 qlen          Query sequence length\n-    24 slen          Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -252,17 +102,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n     </help>\n </tool>\n"
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,156 +1,62 @@\n-<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.20">\n+<tool id="ncbi_blastp_wrapper" name="NCBI BLAST+ blastp" version="0.0.22">\n     <description>Search protein database with protein query sequence(s)</description>\n     <!-- If job splitting is enabled, break up the query file into parts -->\n-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n-    <requirements>\n-        <requirement type="binary">blastp</requirement>\n-        <requirement type="package" version="2.2.26+">blast+</requirement>\n-    </requirements>\n-    <version_command>blastp -version</version_command>\n+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />\n+    <macros>\n+        <token name="@BINARY@">blastp</token>\n+        <import>ncbi_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n     <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n blastp\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n-  -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n-  -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n-  -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -task $blast_type\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n-    -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n -matrix $adv_opts.matrix\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n+@ADVANCED_OPTIONS@\n ##Ungapped disabled for now - see comments below\n ##$adv_opts.ungapped\n-$adv_opts.parse_deflines\n ## End of advanced options:\n #end if\n     </command>\n-    <stdio>\n-        <!-- Anything other than zero is an error -->\n-        <exit_code range="1:" />\n-        <exit_code range=":-1" />\n-        <!-- In case the return code has not been set propery check stderr too -->\n-        <regex match="Error:" />\n-        <regex match="Exception:" />\n-    </stdio>\n+\n+    <expand macro="stdio" />\n+\n     <inputs>\n         <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> \n-        <conditional name="db_opts">\n-            <param name="db_opts_selector" type="select" label="Subject database/sequences">\n-              <option value="db" selected="True">Locally installed BLAST database</option>\n-              <option value="histdb">BLAST database from your history</option>\n-              <option value="file">FASTA file from your history (see warning note below)</option>\n-            </param>\n-            <when value="db">\n-                <param name="database" type="select" label="Protein BLAST database">\n-                    <options from_file="blastdb_p.loc">\n-                      <column name="value" index="0"/>\n-                      <column name="name" index="1"/>\n-                      <column name="path" index="2"/>\n-                    </options>\n-                </param>\n-                <param name="histdb" type="hidden" value="" />\n-                <param name="subject" type="hidden" value="" /> \n-            </when>\n-            <when value="histdb">\n-                <param name="database" type="hidden'..b"not* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n-     1 qseqid    Query Seq-id (ID of your sequence)\n-     2 sseqid    Subject Seq-id (ID of the database hit)\n-     3 pident    Percentage of identical matches\n-     4 length    Alignment length\n-     5 mismatch  Number of mismatches\n-     6 gapopen   Number of gap openings\n-     7 qstart    Start of alignment in query\n-     8 qend      End of alignment in query\n-     9 sstart    Start of alignment in subject (database hit)\n-    10 send      End of alignment in subject (database hit)\n-    11 evalue    Expectation value (E-value)\n-    12 bitscore  Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name     Description\n------- ------------- -------------------------------------------\n-    13 sallseqid     All subject Seq-id(s), separated by a ';'\n-    14 score         Raw score\n-    15 nident        Number of identical matches\n-    16 positive      Number of positive-scoring matches\n-    17 gaps          Total number of gaps\n-    18 ppos          Percentage of positive-scoring matches\n-    19 qframe        Query frame\n-    20 sframe        Subject frame\n-    21 qseq          Aligned part of query sequence\n-    22 sseq          Aligned part of subject sequence\n-    23 qlen          Query sequence length\n-    24 slen          Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -301,17 +142,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n     </help>\n </tool>\n"
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,173 +1,55 @@\n-<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.19">\n+<tool id="ncbi_blastx_wrapper" name="NCBI BLAST+ blastx" version="0.0.22">\n     <description>Search protein database with translated nucleotide query sequence(s)</description>\n     <!-- If job splitting is enabled, break up the query file into parts -->\n-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n-    <requirements>\n-        <requirement type="binary">blastx</requirement>\n-        <requirement type="package" version="2.2.26+">blast+</requirement>\n-    </requirements>\n-    <version_command>blastx -version</version_command>\n+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n+    <macros>\n+        <token name="@BINARY@">blastx</token>\n+        <import>ncbi_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n     <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n blastx\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n-  -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n-  -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n-  -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -query_gencode $query_gencode\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n-    -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n $adv_opts.strand\n -matrix $adv_opts.matrix\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n+@ADVANCED_OPTIONS@\n $adv_opts.ungapped\n-$adv_opts.parse_deflines\n ## End of advanced options:\n #end if\n     </command>\n-    <stdio>\n-        <!-- Anything other than zero is an error -->\n-        <exit_code range="1:" />\n-        <exit_code range=":-1" />\n-        <!-- In case the return code has not been set propery check stderr too -->\n-        <regex match="Error:" />\n-        <regex match="Exception:" />\n-    </stdio>\n+\n+    <expand macro="stdio" />\n+\n     <inputs>\n         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> \n-        <conditional name="db_opts">\n-            <param name="db_opts_selector" type="select" label="Subject database/sequences">\n-              <option value="db" selected="True">Locally installed BLAST database</option>\n-              <option value="histdb">BLAST database from your history</option>\n-              <option value="file">FASTA file from your history (see warning note below)</option>\n-            </param>\n-            <when value="db">\n-                <param name="database" type="select" label="Protein BLAST database">\n-                    <options from_file="blastdb_p.loc">\n-                      <column name="value" index="0"/>\n-                      <column name="name" index="1"/>\n-                      <column name="path" index="2"/>\n-                    </options>\n-                </param>\n-                <param name="histdb" type="hidden" value="" />\n-                <param name="subject" type="hidden" value="" />\n-            </when>\n-            <when value="histdb">\n-                <param name="database" type="'..b"ot* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n-     1 qseqid    Query Seq-id (ID of your sequence)\n-     2 sseqid    Subject Seq-id (ID of the database hit)\n-     3 pident    Percentage of identical matches\n-     4 length    Alignment length\n-     5 mismatch  Number of mismatches\n-     6 gapopen   Number of gap openings\n-     7 qstart    Start of alignment in query\n-     8 qend      End of alignment in query\n-     9 sstart    Start of alignment in subject (database hit)\n-    10 send      End of alignment in subject (database hit)\n-    11 evalue    Expectation value (E-value)\n-    12 bitscore  Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name     Description\n------- ------------- -------------------------------------------\n-    13 sallseqid     All subject Seq-id(s), separated by a ';'\n-    14 score         Raw score\n-    15 nident        Number of identical matches\n-    16 positive      Number of positive-scoring matches\n-    17 gaps          Total number of gaps\n-    18 ppos          Percentage of positive-scoring matches\n-    19 qframe        Query frame\n-    20 sframe        Subject frame\n-    21 qseq          Aligned part of query sequence\n-    22 sseq          Aligned part of subject sequence\n-    23 qlen          Query sequence length\n-    24 slen          Subject sequence length \n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -289,17 +106,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n     </help>\n </tool>\n"
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Thu Dec 05 06:55:59 2013 -0500
b
@@ -0,0 +1,101 @@
+<tool id="ncbi_dustmasker_wrapper" name="NCBI BLAST+ dustmasker" version="0.0.22">
+    <!-- dustmasker wrapper from Edward Kirton and Nicola Soranzo -->
+    <description>masks low complexity regions</description>
+    <macros>
+        <token name="@BINARY@">dustmasker</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+dustmasker
+#if $db_opts.db_opts_selector == "db":
+  -in "${db_opts.database.fields.path}" -infmt blastdb
+#elif $db_opts.db_opts_selector == "histdb":
+  -in "${os.path.join($db_opts.histdb.extra_files_path, 'blastdb')}" -infmt blastdb
+#else:
+  -in "$subject" -infmt fasta
+#end if
+-out "$outfile"
+-window $window -level $level -linker $linker -outfmt $outformat
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <expand macro="input_conditional_nucleotide_db" />
+        <param name="window" type="integer" value="64" label="DUST window length" />
+        <param name="level" type="integer" value="20" label="DUST level" help="Score threshold for subwindows" />
+        <param name="linker" type="integer" value="1" label="DUST linker" help="How close masked intervals should be to get merged together" />
+        <param name="outformat" type="select" label="Output format">
+<!-- acclist and maskinfo_xml are listed as possible output formats in
+     "dustmasker -help", but were not recognized by NCBI BLAST up to
+     release 2.2.27+. Fixed in BLAST 2.2.28+.
+     seqloc_* formats are not very useful -->
+<!--            <option value="acclist">acclist</option>-->
+            <option value="fasta">FASTA</option>
+            <option value="interval" selected="true">interval</option>
+            <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
+            <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option>
+<!--            <option value="maskinfo_xml">maskinfo_xml</option>
+            <option value="seqloc_asn1_bin">seqloc_asn1_bin</option>
+            <option value="seqloc_asn1_text">seqloc_asn1_text</option>
+            <option value="seqloc_xml">seqloc_xml</option>-->
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="interval" label="DUST Masked File">
+            <change_format>
+                <when input="outformat" value="fasta" format="fasta" />
+                <when input="outformat" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
+                <when input="outformat" value="maskinfo_asn1_text" format="maskinfo-asn1" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="window" value="64" />
+            <param name="level" value="20" />
+            <param name="linker" value="1" />
+            <param name="outformat" value="fasta" />
+            <output name="outfile" file="dustmasker_three_human.fasta" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="window" value="64" />
+            <param name="level" value="20" />
+            <param name="linker" value="1" />
+            <param name="outformat" value="maskinfo_asn1_bin" />
+            <output name="outfile" file="dustmasker_three_human.maskinfo-asn1-binary" />
+        </test>
+        <test>
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="window" value="64" />
+            <param name="level" value="20" />
+            <param name="linker" value="1" />
+            <param name="outformat" value="maskinfo_asn1_text" />
+            <output name="outfile" file="dustmasker_three_human.maskinfo-asn1" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+This tool identifies and masks out low complexity regions of a nucleotide database (or sequences in FASTA format) by using the symmetric DUST algorithm.
+
+If you select *maskinfo ASN.1* (binary or text) as output format, the output file can be used as masking data for NCBI BLAST+ makeblastdb tool.
+
+More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: http://www.ncbi.nlm.nih.gov/books/NBK1763/
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers (a more specific paper covering this wrapper is planned):
+
+@REFERENCES@
+    </help>
+</tool>
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -0,0 +1,382 @@\n+<macros>\n+    <xml name="output_change_format">\n+        <change_format>\n+            <when input="out_format" value="0" format="txt"/>\n+            <when input="out_format" value="0 -html" format="html"/>\n+            <when input="out_format" value="2" format="txt"/>\n+            <when input="out_format" value="2 -html" format="html"/>\n+            <when input="out_format" value="4" format="txt"/>\n+            <when input="out_format" value="4 -html" format="html"/>\n+            <when input="out_format" value="5" format="blastxml"/>\n+        </change_format>\n+    </xml>\n+    <xml name="input_out_format">\n+        <param name="out_format" type="select" label="Output format">\n+            <option value="6">Tabular (standard 12 columns)</option>\n+            <option value="ext" selected="True">Tabular (extended 25 columns)</option>\n+            <option value="5">BLAST XML</option>\n+            <option value="0">Pairwise text</option>\n+            <option value="0 -html">Pairwise HTML</option>\n+            <option value="2">Query-anchored text</option>\n+            <option value="2 -html">Query-anchored HTML</option>\n+            <option value="4">Flat query-anchored text</option>\n+            <option value="4 -html">Flat query-anchored HTML</option>\n+            <!--\n+            <option value="-outfmt 11">BLAST archive format (ASN.1)</option>\n+            -->\n+        </param>\n+    </xml>\n+    <xml name="input_scoring_matrix">\n+        <param name="matrix" type="select" label="Scoring matrix">\n+            <option value="BLOSUM90">BLOSUM90</option>\n+            <option value="BLOSUM80">BLOSUM80</option>\n+            <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option>\n+            <option value="BLOSUM50">BLOSUM50</option> \n+            <option value="BLOSUM45">BLOSUM45</option>\n+            <option value="PAM250">PAM250</option>\n+            <option value="PAM70">PAM70</option>\n+            <option value="PAM30">PAM30</option>\n+        </param>\n+    </xml>\n+    <xml name="stdio">\n+        <stdio>\n+            <!-- Anything other than zero is an error -->\n+            <exit_code range="1:" />\n+            <exit_code range=":-1" />\n+            <!-- In case the return code has not been set propery check stderr too -->\n+            <regex match="Error:" />\n+            <regex match="Exception:" />\n+        </stdio>\n+    </xml>\n+    <xml name="input_query_gencode">\n+        <param name="query_gencode" type="select" label="Query genetic code">\n+            <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details -->\n+            <option value="1" select="True">1. Standard</option>\n+            <option value="2">2. Vertebrate Mitochondrial</option>\n+            <option value="3">3. Yeast Mitochondrial</option>\n+            <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>\n+            <option value="5">5. Invertebrate Mitochondrial</option>\n+            <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option>\n+            <option value="9">9. Echinoderm Mitochondrial</option>\n+            <option value="10">10. Euplotid Nuclear</option>\n+            <option value="11">11. Bacteria and Archaea</option>\n+            <option value="12">12. Alternative Yeast Nuclear</option>\n+            <option value="13">13. Ascidian Mitochondrial</option>\n+            <option value="14">14. Flatworm Mitochondrial</option>\n+            <option value="15">15. Blepharisma Macronuclear</option>\n+            <option value="16">16. Chlorophycean Mitochondrial Code</option>\n+            <option value="21">21. Trematode Mitochondrial Code</option>\n+            <option value="22">22. Scenedesmus obliquus mitochondrial Code</option>\n+            <option value="23">23. Thraustochytrium Mitochondrial Code</option>\n+            <option value="24">24. Pterobranchia mitochondrial code</option>\n+        </param>\n+    </xm'..b'ications.\n+BMC Bioinformatics. 15;10:421.\n+http://dx.doi.org/10.1186/1471-2105-10-421\n+\n+This wrapper is available to install into other Galaxy Instances via the Galaxy\n+Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+    </token>\n+    <token name="@OUTPUT_FORMAT@">**Output format**\n+\n+Because Galaxy focuses on processing tabular data, the default output of this\n+tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n+\n+====== ========= ============================================\n+Column NCBI name Description\n+------ --------- --------------------------------------------\n+     1 qseqid    Query Seq-id (ID of your sequence)\n+     2 sseqid    Subject Seq-id (ID of the database hit)\n+     3 pident    Percentage of identical matches\n+     4 length    Alignment length\n+     5 mismatch  Number of mismatches\n+     6 gapopen   Number of gap openings\n+     7 qstart    Start of alignment in query\n+     8 qend      End of alignment in query\n+     9 sstart    Start of alignment in subject (database hit)\n+    10 send      End of alignment in subject (database hit)\n+    11 evalue    Expectation value (E-value)\n+    12 bitscore  Bit score\n+====== ========= ============================================\n+\n+The BLAST+ tools can optionally output additional columns of information,\n+but this takes longer to calculate. Most (but not all) of these columns are\n+included by selecting the extended tabular output. The extra columns are\n+included *after* the standard 12 columns. This is so that you can write\n+workflow filtering steps that accept either the 12 or 25 column tabular\n+BLAST output. Galaxy now uses this extended 25 column output by default.\n+\n+====== ============= ===========================================\n+Column NCBI name     Description\n+------ ------------- -------------------------------------------\n+    13 sallseqid     All subject Seq-id(s), separated by \';\'\n+    14 score         Raw score\n+    15 nident        Number of identical matches\n+    16 positive      Number of positive-scoring matches\n+    17 gaps          Total number of gaps\n+    18 ppos          Percentage of positive-scoring matches\n+    19 qframe        Query frame\n+    20 sframe        Subject frame\n+    21 qseq          Aligned part of query sequence\n+    22 sseq          Aligned part of subject sequence\n+    23 qlen          Query sequence length\n+    24 slen          Subject sequence length\n+    25 salltitles    All subject title(s), separated by \'&lt;&gt;\'\n+====== ============= ===========================================\n+\n+The third option is BLAST XML output, which is designed to be parsed by\n+another program, and is understood by some Galaxy tools.\n+\n+You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n+The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n+The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n+The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n+and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+    </token>\n+    <token name="@FASTA_WARNING@">.. class:: warningmark\n+\n+You can also search against a FASTA file of subject (target)\n+sequences. This is *not* advised because it is slower (only one\n+CPU is used), but more importantly gives e-values for pairwise\n+searches (very small e-values which will look overly signficiant).\n+In most cases you should instead turn the other FASTA file into a\n+database first using *makeblastdb* and search against that.\n+    </token>\n+    <token name="@SEARCH_TIME_WARNING@">.. class:: warningmark\n+\n+**Note**. Database searches may take a substantial amount of time.\n+For large input datasets it is advisable to allow overnight processing.  \n+\n+-----\n+    </token>\n+</macros>\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Thu Dec 05 06:55:59 2013 -0500
b
@@ -1,11 +1,17 @@
-<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.5">
+<tool id="ncbi_makeblastdb" name="NCBI BLAST+ makeblastdb" version="0.0.22">
     <description>Make BLAST database</description>
-    <requirements>
-        <requirement type="binary">makeblastdb</requirement>
-        <requirement type="package" version="2.2.26+">blast+</requirement>
-    </requirements>
-    <version_command>makeblastdb -version</version_command>
-    <command>
+    <macros>
+        <token name="@BINARY@">makeblastdb</token>
+        <import>ncbi_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command interpreter="python">check_no_duplicates.py
+##First check for duplicates (since BLAST+ 2.2.28 fails to do so)
+##and abort (via the ampersand ampersand trick) if any are found.
+#for $i in $in
+"${i.file}"
+#end for
+&amp;&amp;
 makeblastdb -out "${os.path.join($outfile.extra_files_path,'blastdb')}"
 $parse_seqids
 $hash_index
@@ -24,54 +30,55 @@
 -title "BLAST Database"
 #end if
 -dbtype $dbtype 
-## #set $sep = '-mask_data '
-## #for $i in $mask_data
-## $sep${i.file}
-## #set $set = ', '
-## #end for 
+#set $mask_string = ''
+#set $sep = '-mask_data '
+#for $i in $mask_data
+#set $mask_string += $sep + str($i.file)
+#set $sep = ','
+#end for
+$mask_string
+## #set $gi_mask_string = ''
 ## #set $sep = '-gi_mask -gi_mask_name '
 ## #for $i in $gi_mask
-## $sep${i.file}
-## #set $set = ', '
-## #end for 
+## #set $gi_mask_string += $sep + str($i.file)
+## #set $sep = ','
+## #end for
+## $gi_mask_string
 ## #if $tax.select == 'id':
 ## -taxid $tax.id
 ## #else if $tax.select == 'map':
 ## -taxid_map $tax.map
 ## #end if
+## --------------------------------------------------------------------
+## Capture the stdout log information to the primary file (plain text):
+&gt;&gt; "$outfile"
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-        <!-- In case the return code has not been set propery check stderr too -->
-        <regex match="Error:" />
-        <regex match="Exception:" />
-    </stdio>
+    <expand macro="stdio" />
     <inputs>
         <param name="dbtype" type="select" display="radio" label="Molecule type of input">
             <option value="prot">protein</option>
             <option value="nucl">nucleotide</option>
         </param>
         <!-- TODO Allow merging of existing BLAST databases (conditional on the database type)
+             NOTE Double check the new database would be self contained first
         <repeat name="in" title="BLAST or FASTA Database" min="1">
             <param name="file" type="data" format="fasta,blastdbn,blastdbp" label="BLAST or FASTA database" />
         </repeat>
         -->
+        <!-- TODO Switch this to using <param ... multiple="true" /> instead of <repeat> block? -->
         <repeat name="in" title="FASTA file" min="1">
             <param name="file" type="data" format="fasta" />
         </repeat>
         <param name="title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />
         <param name="parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="False" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" />
-        <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values." help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
-
+        <param name="hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
         <!-- SEQUENCE MASKING OPTIONS -->
+        <repeat name="mask_data" title="Masking data file">
+            <param name="mask_data_file" type="data" format="maskinfo-asn1,maskinfo-asn1-binary" label="ASN.1 file containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
+        </repeat>
         <!-- TODO
-        <repeat name="mask_data" title="Provide one or more files containing masking data">
-            <param name="file" type="data" format="asnb" label="File containing masking data" help="As produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker)" />
-        </repeat>
         <repeat name="gi_mask" title="Create GI indexed masking data">
-            <param name="file" type="data" format="asnb" label="Masking data output file" />
+            <param name="gi_mask_file" type="data" format="asnb" label="Masking data output file" />
         </repeat>
         -->
 
@@ -104,6 +111,25 @@
         </data>
     </outputs>
     <tests>
+        <!-- Note the (two line) PIN file is not reproducible run to run.
+        -->
+        <test>
+            <param name="dbtype" value="prot" />
+            <param name="file" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="title" value="Just 4 human proteins" />
+            <param name="parse_seqids" value="" />
+            <param name="hash_index" value="true" />
+            <output name="out_file" file="four_human_proteins.fasta.log" ftype="blastdbp" lines_diff="6">
+                <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />
+                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />
+                <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />
+                <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />
+                <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />
+                <extra_files type="file" value="four_human_proteins.fasta.phi" name="blastdb.phi" />
+                <extra_files type="file" value="four_human_proteins.fasta.psd" name="blastdb.psd" />
+                <extra_files type="file" value="four_human_proteins.fasta.psi" name="blastdb.psi" />
+            </output>
+        </test>
     </tests>
     <help>
 **What it does**
@@ -127,17 +153,6 @@
 If you use this Galaxy tool in work leading to a scientific publication please
 cite the following papers:
 
-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-Christiam Camacho et al. (2009).
-BLAST+: architecture and applications.
-BMC Bioinformatics. 15;10:421.
-http://dx.doi.org/10.1186/1471-2105-10-421
-
-This wrapper is available to install into other Galaxy Instances via the Galaxy
-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+@REFERENCES@
     </help>
 </tool>
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,12 +1,12 @@\n-<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.4">\n+<tool id="ncbi_rpsblast_wrapper" name="NCBI BLAST+ rpsblast" version="0.0.22">\n     <description>Search protein domain database (PSSMs) with protein query sequence(s)</description>\n     <!-- If job splitting is enabled, break up the query file into parts -->\n-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>\n-    <requirements>\n-        <requirement type="binary">rpsblast</requirement>\n-        <requirement type="package" version="2.2.26+">blast+</requirement>\n-    </requirements>\n-    <version_command>rpsblast -version</version_command>\n+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />\n+    <macros>\n+        <token name="@BINARY@">deltablast</token>\n+        <import>ncbi_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n     <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n@@ -18,121 +18,43 @@\n   -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n #end if\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n-    -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n-$adv_opts.parse_deflines\n+@ADVANCED_OPTIONS@\n ## End of advanced options:\n #end if\n     </command>\n-    <stdio>\n-        <!-- Anything other than zero is an error -->\n-        <exit_code range="1:" />\n-        <exit_code range=":-1" />\n-        <!-- In case the return code has not been set propery check stderr too -->\n-        <regex match="Error:" />\n-        <regex match="Exception:" />\n-    </stdio>\n+\n+    <expand macro="stdio" />\n+\n     <inputs>\n         <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> \n-        <conditional name="db_opts">\n-            <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)">\n-              <option value="db" selected="True">Locally installed BLAST database</option>\n-\t      <!-- TODO - define new datatype\n-              <option value="histdb">BLAST protein domain database from your history</option>\n-\t      -->\n-            </param>\n-            <when value="db">\n-                <param name="database" type="select" label="Protein domain database">\n-                    <options from_file="blastdb_d.loc">\n-                      <column name="value" index="0"/>\n-                      <column name="name" index="1"/>\n-                      <column name="path" index="2"/>\n-                    </options>\n-                </param>\n-                <param name="histdb" type="hidden" value="" />\n-                <param name="subject" type="hidden" value="" /> \n-            </when>\n-\t    <!-- TODO - define new datatype\n-            <when value="histdb">\n-                <param name="database" type="hidden" value="" />\n-                <param name="histdb" type="data" format="blastdbd" label="Protein domain database" />\n-                <param name="subject" type="hidden" value="" />\n-            </when>\n-\t    -->\n-        </conditional>\n-        <param name="evalue_cutoff" '..b'output_change_format" />\n+\n         </data>\n     </outputs>\n     <help>\n     \n-.. class:: warningmark\n-\n-**Note**. Database searches may take a substantial amount of time.\n-For large input datasets it is advisable to allow overnight processing.  \n-\n------\n+@SEARCH_TIME_WARNING@\n \n **What it does**\n \n@@ -171,60 +93,7 @@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n-     1 qseqid    Query Seq-id (ID of your sequence)\n-     2 sseqid    Subject Seq-id (ID of the database hit)\n-     3 pident    Percentage of identical matches\n-     4 length    Alignment length\n-     5 mismatch  Number of mismatches\n-     6 gapopen   Number of gap openings\n-     7 qstart    Start of alignment in query\n-     8 qend      End of alignment in query\n-     9 sstart    Start of alignment in subject (database hit)\n-    10 send      End of alignment in subject (database hit)\n-    11 evalue    Expectation value (E-value)\n-    12 bitscore  Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name     Description\n------- ------------- -------------------------------------------\n-    13 sallseqid     All subject Seq-id(s), separated by a \';\'\n-    14 score         Raw score\n-    15 nident        Number of identical matches\n-    16 positive      Number of positive-scoring matches\n-    17 gaps          Total number of gaps\n-    18 ppos          Percentage of positive-scoring matches\n-    19 qframe        Query frame\n-    20 sframe        Subject frame\n-    21 qseq          Aligned part of query sequence\n-    22 sseq          Aligned part of subject sequence\n-    23 qlen          Query sequence length\n-    24 slen          Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -233,17 +102,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n     </help>\n </tool>\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,12 +1,12 @@\n-<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.4">\n+<tool id="ncbi_rpstblastn_wrapper" name="NCBI BLAST+ rpstblastn" version="0.0.22">\n     <description>Search protein domain database (PSSMs) with translated nucleotide query sequence(s)</description>\n     <!-- If job splitting is enabled, break up the query file into parts -->\n-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject" merge_outputs="output1"></parallelism>\n-    <requirements>\n-        <requirement type="binary">rpstblastn</requirement>\n-        <requirement type="package" version="2.2.26+">blast+</requirement>\n-    </requirements>\n-    <version_command>rpstblastn -version</version_command>\n+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n+    <macros>\n+        <token name="@BINARY@">rpstblastn</token>\n+        <import>ncbi_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n     <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n@@ -18,122 +18,41 @@\n   -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n #end if\n -evalue $evalue_cutoff\n--out "$output1"\n-## Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n-    -outfmt $out_format\n-#end if\n+@BLAST_OUTPUT@\n ## rpstblastn does not support multiple threads up to release 2.2.27+. Added in BLAST 2.2.28+.\n ##-num_threads 8\n #if $adv_opts.adv_opts_selector=="advanced":\n-$adv_opts.filter_query\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n-$adv_opts.parse_deflines\n+@ADVANCED_OPTIONS@\n ## End of advanced options:\n #end if\n     </command>\n-    <stdio>\n-        <!-- Anything other than zero is an error -->\n-        <exit_code range="1:" />\n-        <exit_code range=":-1" />\n-        <!-- In case the return code has not been set propery check stderr too -->\n-        <regex match="Error:" />\n-        <regex match="Exception:" />\n-    </stdio>\n+    <expand macro="stdio" />\n     <inputs>\n         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> \n-        <conditional name="db_opts">\n-            <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)">\n-              <option value="db" selected="True">Locally installed BLAST database</option>\n-              <!-- TODO - define new datatype\n-              <option value="histdb">BLAST protein domain database from your history</option>\n-              -->\n-            </param>\n-            <when value="db">\n-                <param name="database" type="select" label="Protein domain database">\n-                    <options from_file="blastdb_d.loc">\n-                      <column name="value" index="0"/>\n-                      <column name="name" index="1"/>\n-                      <column name="path" index="2"/>\n-                    </options>\n-                </param>\n-                <param name="histdb" type="hidden" value="" />\n-                <param name="subject" type="hidden" value="" />\n-            </when>\n-            <!-- TODO - define new datatype\n-            <when value="histdb">\n-                <param name="database" type="hidden" value="" />\n-                <param name="histdb" type="data" format="blastdbd" label="Protein domain database" />\n-           '..b'="output_change_format" />\n         </data>\n     </outputs>\n     <help>\n     \n-.. class:: warningmark\n-\n-**Note**. Database searches may take a substantial amount of time.\n-For large input datasets it is advisable to allow overnight processing.  \n-\n------\n+@SEARCH_TIME_WARNING@\n \n **What it does**\n \n@@ -172,60 +91,7 @@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n-     1 qseqid    Query Seq-id (ID of your sequence)\n-     2 sseqid    Subject Seq-id (ID of the database hit)\n-     3 pident    Percentage of identical matches\n-     4 length    Alignment length\n-     5 mismatch  Number of mismatches\n-     6 gapopen   Number of gap openings\n-     7 qstart    Start of alignment in query\n-     8 qend      End of alignment in query\n-     9 sstart    Start of alignment in subject (database hit)\n-    10 send      End of alignment in subject (database hit)\n-    11 evalue    Expectation value (E-value)\n-    12 bitscore  Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name     Description\n------- ------------- -------------------------------------------\n-    13 sallseqid     All subject Seq-id(s), separated by a \';\'\n-    14 score         Raw score\n-    15 nident        Number of identical matches\n-    16 positive      Number of positive-scoring matches\n-    17 gaps          Total number of gaps\n-    18 ppos          Percentage of positive-scoring matches\n-    19 qframe        Query frame\n-    20 sframe        Subject frame\n-    21 qseq          Aligned part of query sequence\n-    22 sseq          Aligned part of subject sequence\n-    23 qlen          Query sequence length\n-    24 slen          Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -234,17 +100,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n     </help>\n </tool>\n'
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,173 +1,59 @@\n-<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.20">\n+<tool id="ncbi_tblastn_wrapper" name="NCBI BLAST+ tblastn" version="0.0.22">\n     <description>Search translated nucleotide database with protein query sequence(s)</description>\n     <!-- If job splitting is enabled, break up the query file into parts -->\n-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n-    <requirements>\n-        <requirement type="binary">tblastn</requirement>\n-        <requirement type="package" version="2.2.26+">blast+</requirement>\n-    </requirements>\n-    <version_command>tblastn -version</version_command>\n+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n+    <macros>\n+        <token name="@BINARY@">tblastn</token>\n+        <import>ncbi_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n     <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n tblastn\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n-  -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n-  -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n-  -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n-    -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n -db_gencode $adv_opts.db_gencode\n-$adv_opts.filter_query\n -matrix $adv_opts.matrix\n-## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n-## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n+@ADVANCED_OPTIONS@\n ##Ungapped disabled for now - see comments below\n ##$adv_opts.ungapped\n-$adv_opts.parse_deflines\n ## End of advanced options:\n #end if\n     </command>\n-    <stdio>\n-        <!-- Anything other than zero is an error -->\n-        <exit_code range="1:" />\n-        <exit_code range=":-1" />\n-        <!-- In case the return code has not been set propery check stderr too -->\n-        <regex match="Error:" />\n-        <regex match="Exception:" />\n-    </stdio>\n+\n+    <expand macro="stdio" />\n+\n     <inputs>\n         <param name="query" type="data" format="fasta" label="Protein query sequence(s)"/> \n-        <conditional name="db_opts">\n-            <param name="db_opts_selector" type="select" label="Subject database/sequences">\n-              <option value="db" selected="True">Locally installed BLAST database</option>\n-              <option value="histdb">BLAST database from your history</option>\n-              <option value="file">FASTA file from your history (see warning note below)</option>\n-            </param>\n-            <when value="db">\n-                <param name="database" type="select" label="Nucleotide BLAST database">\n-                    <options from_file="blastdb.loc">\n-                      <column name="value" index="0"/>\n-                      <column name="name" index="1"/>\n-                      <column name="path" index="2"/>\n-                    </options>\n-                </param>\n-                <param name="histdb" type="hidden" value="" />\n-                <param name="subject" type="hidden" value="" />\n-            </when>\n-            <when value="histdb">\n-  '..b"not* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n-     1 qseqid    Query Seq-id (ID of your sequence)\n-     2 sseqid    Subject Seq-id (ID of the database hit)\n-     3 pident    Percentage of identical matches\n-     4 length    Alignment length\n-     5 mismatch  Number of mismatches\n-     6 gapopen   Number of gap openings\n-     7 qstart    Start of alignment in query\n-     8 qend      End of alignment in query\n-     9 sstart    Start of alignment in subject (database hit)\n-    10 send      End of alignment in subject (database hit)\n-    11 evalue    Expectation value (E-value)\n-    12 bitscore  Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name     Description\n------- ------------- -------------------------------------------\n-    13 sallseqid     All subject Seq-id(s), separated by a ';'\n-    14 score         Raw score\n-    15 nident        Number of identical matches\n-    16 positive      Number of positive-scoring matches\n-    17 gaps          Total number of gaps\n-    18 ppos          Percentage of positive-scoring matches\n-    19 qframe        Query frame\n-    20 sframe        Subject frame\n-    21 qseq          Aligned part of query sequence\n-    22 sseq          Aligned part of subject sequence\n-    23 qlen          Query sequence length\n-    24 slen          Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -335,17 +156,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n     </help>\n </tool>\n"
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Thu Dec 05 06:55:59 2013 -0500
b
b'@@ -1,193 +1,59 @@\n-<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.20">\n+<tool id="ncbi_tblastx_wrapper" name="NCBI BLAST+ tblastx" version="0.0.22">\n     <description>Search translated nucleotide database with translated nucleotide query sequence(s)</description>\n     <!-- If job splitting is enabled, break up the query file into parts -->\n-    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" shared_inputs="subject,histdb" merge_outputs="output1"></parallelism>\n-    <requirements>\n-        <requirement type="binary">tblastx</requirement>\n-        <requirement type="package" version="2.2.26+">blast+</requirement>\n-    </requirements>\n-    <version_command>tblastx -version</version_command>\n+    <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1"></parallelism>\n+    <macros>\n+        <token name="@BINARY@">tblastx</token>\n+        <import>ncbi_macros.xml</import>\n+    </macros>\n+    <expand macro="requirements" />\n     <command>\n ## The command is a Cheetah template which allows some Python based syntax.\n ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces\n tblastx\n -query "$query"\n-#if $db_opts.db_opts_selector == "db":\n-  -db "${db_opts.database.fields.path}"\n-#elif $db_opts.db_opts_selector == "histdb":\n-  -db "${os.path.join($db_opts.histdb.extra_files_path,\'blastdb\')}"\n-#else:\n-  -subject "$db_opts.subject"\n-#end if\n+@BLAST_DB_SUBJECT@\n -query_gencode $query_gencode\n -evalue $evalue_cutoff\n--out "$output1"\n-##Set the extended list here so if/when we add things, saved workflows are not affected\n-#if str($out_format)=="ext":\n-    -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen"\n-#else:\n-    -outfmt $out_format\n-#end if\n--num_threads 8\n+@BLAST_OUTPUT@\n+@THREADS@\n #if $adv_opts.adv_opts_selector=="advanced":\n -db_gencode $adv_opts.db_gencode\n-$adv_opts.filter_query\n $adv_opts.strand\n -matrix $adv_opts.matrix\n ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string\n ## Note -max_target_seqs overrides -num_descriptions and -num_alignments\n-#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):\n--max_target_seqs $adv_opts.max_hits\n-#end if\n-#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0):\n--word_size $adv_opts.word_size\n-#end if\n-$adv_opts.parse_deflines\n+@ADVANCED_OPTIONS@\n ## End of advanced options:\n #end if\n     </command>\n-    <stdio>\n-        <!-- Anything other than zero is an error -->\n-        <exit_code range="1:" />\n-        <exit_code range=":-1" />\n-        <!-- In case the return code has not been set propery check stderr too -->\n-        <regex match="Error:" />\n-        <regex match="Exception:" />\n-    </stdio>\n+\n+    <expand macro="stdio" />\n+\n     <inputs>\n         <param name="query" type="data" format="fasta" label="Nucleotide query sequence(s)"/> \n-        <conditional name="db_opts">\n-            <param name="db_opts_selector" type="select" label="Subject database/sequences">\n-              <option value="db" selected="True">Locally installed BLAST database</option>\n-              <option value="histdb">BLAST database from your history</option>\n-              <option value="file">FASTA file from your history (see warning note below)</option>\n-            </param>\n-            <when value="db">\n-                <param name="database" type="select" label="Nucleotide BLAST database">\n-                    <options from_file="blastdb.loc">\n-                      <column name="value" index="0"/>\n-                      <column name="name" index="1"/>\n-                      <column name="path" index="2"/>\n-                    </options>\n-                </param>\n-                <param name="histdb" type="hidden" value="" />\n-                <param name="subject" type="hidden" value="" />\n-            </when>\n-            <when value="histdb">\n-        '..b"*not* advised because it is slower (only one\n-CPU is used), but more importantly gives e-values for pairwise\n-searches (very small e-values which will look overly signficiant).\n-In most cases you should instead turn the other FASTA file into a\n-database first using *makeblastdb* and search against that.\n+@FASTA_WARNING@\n \n -----\n \n-**Output format**\n-\n-Because Galaxy focuses on processing tabular data, the default output of this\n-tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n-\n-====== ========= ============================================\n-Column NCBI name Description\n------- --------- --------------------------------------------\n-     1 qseqid    Query Seq-id (ID of your sequence)\n-     2 sseqid    Subject Seq-id (ID of the database hit)\n-     3 pident    Percentage of identical matches\n-     4 length    Alignment length\n-     5 mismatch  Number of mismatches\n-     6 gapopen   Number of gap openings\n-     7 qstart    Start of alignment in query\n-     8 qend      End of alignment in query\n-     9 sstart    Start of alignment in subject (database hit)\n-    10 send      End of alignment in subject (database hit)\n-    11 evalue    Expectation value (E-value)\n-    12 bitscore  Bit score\n-====== ========= ============================================\n-\n-The BLAST+ tools can optionally output additional columns of information,\n-but this takes longer to calculate. Most (but not all) of these columns are\n-included by selecting the extended tabular output. The extra columns are\n-included *after* the standard 12 columns. This is so that you can write\n-workflow filtering steps that accept either the 12 or 24 column tabular\n-BLAST output. Galaxy now uses this extended 24 column output by default.\n-\n-====== ============= ===========================================\n-Column NCBI name     Description\n------- ------------- -------------------------------------------\n-    13 sallseqid     All subject Seq-id(s), separated by a ';'\n-    14 score         Raw score\n-    15 nident        Number of identical matches\n-    16 positive      Number of positive-scoring matches\n-    17 gaps          Total number of gaps\n-    18 ppos          Percentage of positive-scoring matches\n-    19 qframe        Query frame\n-    20 sframe        Subject frame\n-    21 qseq          Aligned part of query sequence\n-    22 sseq          Aligned part of subject sequence\n-    23 qlen          Query sequence length\n-    24 slen          Subject sequence length\n-====== ============= ===========================================\n-\n-The third option is BLAST XML output, which is designed to be parsed by\n-another program, and is understood by some Galaxy tools.\n-\n-You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program).\n-The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website.\n-The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n-The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n-and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n+@OUTPUT_FORMAT@\n \n -------\n \n@@ -289,17 +90,6 @@\n If you use this Galaxy tool in work leading to a scientific publication please\n cite the following papers:\n \n-Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n-Galaxy tools and workflows for sequence analysis with applications\n-in molecular plant pathology. PeerJ 1:e167\n-http://dx.doi.org/10.7717/peerj.167\n-\n-Christiam Camacho et al. (2009).\n-BLAST+: architecture and applications.\n-BMC Bioinformatics. 15;10:421.\n-http://dx.doi.org/10.1186/1471-2105-10-421\n-\n-This wrapper is available to install into other Galaxy Instances via the Galaxy\n-Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus\n+@REFERENCES@\n     </help>\n </tool>\n"
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/repository_dependencies.xml
--- a/tools/ncbi_blast_plus/repository_dependencies.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/repository_dependencies.xml Thu Dec 05 06:55:59 2013 -0500
b
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format).">
-    <repository changeset_revision="b3a3ba0c1d47" name="blast_datatypes" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="a04cf51612f1" name="blast_datatypes" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
 </repositories>
b
diff -r 70e7dcbf6573 -r 4c4a0da938ff tools/ncbi_blast_plus/tool_dependencies.xml
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Mon Sep 23 06:14:13 2013 -0400
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml Thu Dec 05 06:55:59 2013 -0500
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="blast+" version="2.2.26+">
-        <repository changeset_revision="40c69b76b46e" name="package_blast_plus_2_2_26" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+    <package name="blast+" version="2.2.28">
+        <repository changeset_revision="23b9ba41ad00" name="package_blast_plus_2_2_28" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>