Repository 'ncbi_blast_plus'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus

Changeset 23:31e517610e1f (2018-06-30)
Previous changeset 22:6f386c5dc4fb (2017-09-18) Next changeset 24:c877294f8025 (2018-07-09)
Commit message:
v0.3.0 Updated for NCBI BLAST+ 2.7.1
modified:
test-data/blastn_chimera_vs_rhodopsin_db.tabular
test-data/blastn_chimera_vs_three_human_and_rhodopsin_db.tabular
test-data/blastn_chimera_vs_three_human_max1.txt
test-data/blastn_rhodopsin_vs_three_human.xml
test-data/blastp_four_human_vs_rhodopsin.tabular
test-data/blastp_four_human_vs_rhodopsin.xml
test-data/blastp_four_human_vs_rhodopsin_ext.tabular
test-data/blastx_rhodopsin_vs_four_human.xml
test-data/cd00003_and_cd00008.pin
test-data/four_human_proteins.dbinfo.txt
test-data/four_human_proteins.fasta.pin
test-data/four_human_proteins_taxid.fasta.pin
test-data/rhodopsin_nucs.dbinfo.txt
test-data/rhodopsin_nucs.fasta.nin
test-data/tblastn_four_human_vs_rhodopsin.html
test-data/tblastn_four_human_vs_rhodopsin.xml
test-data/tblastn_four_human_vs_rhodopsin_deflines.tabular
test-data/three_human_mRNA.dbinfo.txt
test-data/three_human_mRNA.fasta.nin
tools/ncbi_blast_plus/README.rst
tools/ncbi_blast_plus/blastxml_to_tabular.py
tools/ncbi_blast_plus/blastxml_to_tabular.xml
tools/ncbi_blast_plus/check_no_duplicates.py
tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
tools/ncbi_blast_plus/ncbi_macros.xml
tools/ncbi_blast_plus/ncbi_makeblastdb.xml
tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
added:
test-data/chimera.fasta.gz
test-data/rhodopsin_nucs.fasta.gz
test-data/three_human_mRNA.fasta.gz
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/blastn_chimera_vs_rhodopsin_db.tabular
--- a/test-data/blastn_chimera_vs_rhodopsin_db.tabular Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/blastn_chimera_vs_rhodopsin_db.tabular Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,8 +1,8 @@
-chimera NM_001009242 92.308 1014 78 0 8881 9894 34 1047 0.0 1441
-chimera GQ290312 91.527 956 81 0 8881 9836 4 959 0.0 1317
-chimera AB062417 87.586 1015 124 2 8881 9894 34 1047 0.0 1175
-chimera GQ290303 91.515 330 28 0 8881 9210 4 333 8.28e-130 455
-chimera GQ290303 91.358 243 19 2 9542 9783 3127 3368 1.46e-92 331
-chimera GQ290303 94.220 173 10 0 9208 9380 1410 1582 1.50e-72 265
-chimera GQ290303 92.941 170 12 0 9375 9544 2854 3023 1.51e-67 248
-chimera GQ290303 95.588 68 3 0 9781 9848 4222 4289 7.43e-26 110
+chimera NM_001009242.1 92.308 1014 78 0 8881 9894 34 1047 0.0 1441
+chimera GQ290312.1 91.527 956 81 0 8881 9836 4 959 0.0 1317
+chimera AB062417.1 87.586 1015 124 2 8881 9894 34 1047 0.0 1175
+chimera GQ290303.1 91.515 330 28 0 8881 9210 4 333 8.28e-130 455
+chimera GQ290303.1 91.358 243 19 2 9542 9783 3127 3368 1.46e-92 331
+chimera GQ290303.1 94.220 173 10 0 9208 9380 1410 1582 1.50e-72 265
+chimera GQ290303.1 92.941 170 12 0 9375 9544 2854 3023 1.51e-67 248
+chimera GQ290303.1 95.588 68 3 0 9781 9848 4222 4289 7.43e-26 110
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/blastn_chimera_vs_three_human_and_rhodopsin_db.tabular
--- a/test-data/blastn_chimera_vs_three_human_and_rhodopsin_db.tabular Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/blastn_chimera_vs_three_human_and_rhodopsin_db.tabular Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,11 +1,11 @@
 chimera ENA|AB011145|AB011145.1 100.000 4560 0 0 1 4560 121 4680 0.0 8421
 chimera ENA|M10051|M10051.1 99.931 4331 3 0 4560 8890 60 4390 0.0 7982
 chimera ENA|BC112106|BC112106.1 100.000 1093 0 0 8881 9973 121 1213 0.0 2019
-chimera NM_001009242 92.308 1014 78 0 8881 9894 34 1047 0.0 1441
-chimera GQ290312 91.527 956 81 0 8881 9836 4 959 0.0 1317
-chimera AB062417 87.586 1015 124 2 8881 9894 34 1047 0.0 1175
-chimera GQ290303 91.515 330 28 0 8881 9210 4 333 1.70e-129 455
-chimera GQ290303 91.358 243 19 2 9542 9783 3127 3368 2.98e-92 331
-chimera GQ290303 94.220 173 10 0 9208 9380 1410 1582 3.07e-72 265
-chimera GQ290303 92.941 170 12 0 9375 9544 2854 3023 3.09e-67 248
-chimera GQ290303 95.588 68 3 0 9781 9848 4222 4289 1.52e-25 110
+chimera NM_001009242.1 92.308 1014 78 0 8881 9894 34 1047 0.0 1441
+chimera GQ290312.1 91.527 956 81 0 8881 9836 4 959 0.0 1317
+chimera AB062417.1 87.586 1015 124 2 8881 9894 34 1047 0.0 1175
+chimera GQ290303.1 91.515 330 28 0 8881 9210 4 333 1.70e-129 455
+chimera GQ290303.1 91.358 243 19 2 9542 9783 3127 3368 2.98e-92 331
+chimera GQ290303.1 94.220 173 10 0 9208 9380 1410 1582 3.07e-72 265
+chimera GQ290303.1 92.941 170 12 0 9375 9544 2854 3023 3.09e-67 248
+chimera GQ290303.1 95.588 68 3 0 9781 9848 4222 4289 1.52e-25 110
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/blastn_chimera_vs_three_human_max1.txt
--- a/test-data/blastn_chimera_vs_three_human_max1.txt Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/blastn_chimera_vs_three_human_max1.txt Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,4 +1,4 @@
-BLASTN 2.5.0+
+BLASTN 2.7.1+
 
 
 Reference: Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb
@@ -346,7 +346,7 @@
 
 
   Database: Just 3 human mRNA sequences
-    Posted date:  Dec 2, 2016  10:38 AM
+    Posted date:  Oct 31, 2017  4:23 PM
   Number of letters in database: 10,732
   Number of sequences in database:  3
 
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/blastn_rhodopsin_vs_three_human.xml
--- a/test-data/blastn_rhodopsin_vs_three_human.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/blastn_rhodopsin_vs_three_human.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>blastn</BlastOutput_program>
-  <BlastOutput_version>BLASTN 2.5.0+</BlastOutput_version>
+  <BlastOutput_version>BLASTN 2.7.1+</BlastOutput_version>
   <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), &quot;A greedy algorithm for aligning DNA sequences&quot;, J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/blastp_four_human_vs_rhodopsin.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin.tabular Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin.tabular Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,6 +1,6 @@
-P08100 NP_001009242 96.552 348 12 0 1 348 1 348 0.0 701
+P08100 NP_001009242.1 96.552 348 12 0 1 348 1 348 0.0 701
 P08100 0811197A 93.103 348 23 1 1 348 1 347 0.0 673
-P08100 ADB45242 94.817 328 17 0 11 338 1 328 0.0 653
-P08100 ADB45229 94.817 328 17 0 11 338 1 328 0.0 631
-P08100 P56514 84.795 342 51 1 1 341 1 342 0.0 619
-P08100 BAB21486 82.164 342 60 1 1 341 1 342 0.0 599
+P08100 ADB45242.1 94.817 328 17 0 11 338 1 328 0.0 653
+P08100 ADB45229.1 94.817 328 17 0 11 338 1 328 0.0 631
+P08100 P56514.1 84.795 342 51 1 1 341 1 342 0.0 619
+P08100 BAB21486.1 82.164 342 60 1 1 341 1 342 0.0 599
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/blastp_four_human_vs_rhodopsin.xml
--- a/test-data/blastp_four_human_vs_rhodopsin.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>blastp</BlastOutput_program>
-  <BlastOutput_version>BLASTP 2.5.0+</BlastOutput_version>
+  <BlastOutput_version>BLASTP 2.7.1+</BlastOutput_version>
   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Q9BS26</BlastOutput_query-ID>
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/blastp_four_human_vs_rhodopsin_ext.tabular
--- a/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/blastp_four_human_vs_rhodopsin_ext.tabular Sat Jun 30 17:22:46 2018 -0400
b
b'@@ -1,6 +1,6 @@\n-P08100\tNP_001009242\t96.552\t348\t12\t0\t1\t348\t1\t348\t0.0\t701\tgi|57163783|ref|NP_001009242.1|\t1808\t336\t343\t0\t98.56\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\t348\t348\tN/A\n+P08100\tNP_001009242.1\t96.552\t348\t12\t0\t1\t348\t1\t348\t0.0\t701\tgi|57163783|ref|NP_001009242.1|\t1808\t336\t343\t0\t98.56\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA\t348\t348\tN/A\n P08100\t0811197A\t93.103\t348\t23\t1\t1\t348\t1\t347\t0.0\t673\tgi|223523|prf||0811197A\t1736\t324\t336\t1\t96.55\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA\tMNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA\t348\t347\tN/A\n-P08100\tADB45242\t94.817\t328\t17\t0\t11\t338\t1\t328\t0.0\t653\tgi|283855846|gb|ADB45242.1|\t1684\t311\t321\t0\t97.87\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\tN/A\n-P08100\tADB45229\t94.817\t328\t17\t0\t11\t338\t1\t328\t0.0\t631\tgi|283855823|gb|ADB45229.1|\t1627\t311\t323\t0\t98.48\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCML'..b'SAATSKTE\t348\t354\tN/A\n-P08100\tBAB21486\t82.164\t342\t60\t1\t1\t341\t1\t342\t0.0\t599\tgi|12583665|dbj|BAB21486.1|\t1544\t281\t314\t1\t91.81\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE\t348\t354\tN/A\n+P08100\tADB45242.1\t94.817\t328\t17\t0\t11\t338\t1\t328\t0.0\t653\tgi|283855846|gb|ADB45242.1|\t1684\t311\t321\t0\t97.87\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\tN/A\n+P08100\tADB45229.1\t94.817\t328\t17\t0\t11\t338\t1\t328\t0.0\t631\tgi|283855823|gb|ADB45229.1|\t1627\t311\t323\t0\t98.48\t1\t1\tVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS\tVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS\t348\t328\tN/A\n+P08100\tP56514.1\t84.795\t342\t51\t1\t1\t341\t1\t342\t0.0\t619\tgi|3024260|sp|P56514.1|OPSD_BUFBU\t1595\t290\t322\t1\t94.15\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE\tMNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE\t348\t354\tN/A\n+P08100\tBAB21486.1\t82.164\t342\t60\t1\t1\t341\t1\t342\t0.0\t599\tgi|12583665|dbj|BAB21486.1|\t1544\t281\t314\t1\t91.81\t1\t1\tMNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE\tMNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE\t348\t354\tN/A\n'
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/blastx_rhodopsin_vs_four_human.xml
--- a/test-data/blastx_rhodopsin_vs_four_human.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/blastx_rhodopsin_vs_four_human.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>blastx</BlastOutput_program>
-  <BlastOutput_version>BLASTX 2.5.0+</BlastOutput_version>
+  <BlastOutput_version>BLASTX 2.7.1+</BlastOutput_version>
   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/cd00003_and_cd00008.pin
b
Binary file test-data/cd00003_and_cd00008.pin has changed
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/chimera.fasta.gz
b
Binary file test-data/chimera.fasta.gz has changed
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/four_human_proteins.dbinfo.txt
--- a/test-data/four_human_proteins.dbinfo.txt Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/four_human_proteins.dbinfo.txt Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,7 +1,7 @@
 Database: Just 4 human proteins
  4 sequences; 3,297 total residues
 
-Date: Dec 2, 2016  10:38 AM Longest sequence: 1,382 residues
+Date: Oct 31, 2017  4:23 PM Longest sequence: 1,382 residues
 
 Volumes:
- /mnt/shared/users/pc40583/repositories/galaxy_blast/test-data/four_human_proteins_taxid.fasta
+ /mnt/galaxy/repositories/galaxy_blast/test-data/four_human_proteins_taxid.fasta
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/four_human_proteins.fasta.pin
b
Binary file test-data/four_human_proteins.fasta.pin has changed
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/four_human_proteins_taxid.fasta.pin
b
Binary file test-data/four_human_proteins_taxid.fasta.pin has changed
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/rhodopsin_nucs.dbinfo.txt
--- a/test-data/rhodopsin_nucs.dbinfo.txt Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/rhodopsin_nucs.dbinfo.txt Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,7 +1,7 @@
 Database: Rhodopsin nucleotides
  6 sequences; 10,296 total bases
 
-Date: Dec 2, 2016  10:38 AM Longest sequence: 4,301 bases
+Date: Oct 31, 2017  4:23 PM Longest sequence: 4,301 bases
 
 Volumes:
- /mnt/shared/users/pc40583/repositories/galaxy_blast/test-data/rhodopsin_nucs.fasta
+ /mnt/galaxy/repositories/galaxy_blast/test-data/rhodopsin_nucs.fasta
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/rhodopsin_nucs.fasta.gz
b
Binary file test-data/rhodopsin_nucs.fasta.gz has changed
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/rhodopsin_nucs.fasta.nin
b
Binary file test-data/rhodopsin_nucs.fasta.nin has changed
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/tblastn_four_human_vs_rhodopsin.html
--- a/test-data/tblastn_four_human_vs_rhodopsin.html Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin.html Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,9 +1,9 @@
 <HTML>
-<TITLE>BLAST Search Results</TITLE>
+<HEAD><TITLE>BLAST Search Results</TITLE></HEAD>
 <BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
 <PRE>
 
-<b>TBLASTN 2.5.0+</b>
+<b>TBLASTN 2.7.1+</b>
 
 
 <b><a
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/tblastn_four_human_vs_rhodopsin.xml
--- a/test-data/tblastn_four_human_vs_rhodopsin.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -2,7 +2,7 @@
 <!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
 <BlastOutput>
   <BlastOutput_program>tblastn</BlastOutput_program>
-  <BlastOutput_version>TBLASTN 2.5.0+</BlastOutput_version>
+  <BlastOutput_version>TBLASTN 2.7.1+</BlastOutput_version>
   <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
   <BlastOutput_db></BlastOutput_db>
   <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/tblastn_four_human_vs_rhodopsin_deflines.tabular
--- a/test-data/tblastn_four_human_vs_rhodopsin_deflines.tabular Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/tblastn_four_human_vs_rhodopsin_deflines.tabular Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,10 +1,10 @@
-P08100 NM_001009242 96.552 348 12 0 1 348 1 1044 0.0 732
-P08100 AB062417 93.391 348 23 0 1 348 1 1044 0.0 711
-P08100 GQ290312 95.092 326 16 0 11 336 1 978 0.0 658
-P08100 U59921 84.795 342 51 1 1 341 42 1067 0.0 646
-P08100 AB043817 82.164 342 60 1 1 341 23 1048 0.0 626
-P08100 GQ290303 93.243 74 5 0 239 312 3147 3368 1.34e-71 151
-P08100 GQ290303 91.525 59 5 0 177 235 2855 3031 1.34e-71 126
-P08100 GQ290303 96.396 111 4 0 11 121 1 333 3.31e-67 229
-P08100 GQ290303 93.220 59 4 0 119 177 1404 1580 2.31e-32 122
-P08100 GQ290303 88.462 26 3 0 312 337 4222 4299 3.61e-12 57.7
+P08100 NM_001009242.1 96.552 348 12 0 1 348 1 1044 0.0 732
+P08100 AB062417.1 93.391 348 23 0 1 348 1 1044 0.0 711
+P08100 GQ290312.1 95.092 326 16 0 11 336 1 978 0.0 658
+P08100 U59921.1 84.795 342 51 1 1 341 42 1067 0.0 646
+P08100 AB043817.1 82.164 342 60 1 1 341 23 1048 0.0 626
+P08100 GQ290303.1 93.243 74 5 0 239 312 3147 3368 1.34e-71 151
+P08100 GQ290303.1 91.525 59 5 0 177 235 2855 3031 1.34e-71 126
+P08100 GQ290303.1 96.396 111 4 0 11 121 1 333 3.31e-67 229
+P08100 GQ290303.1 93.220 59 4 0 119 177 1404 1580 2.31e-32 122
+P08100 GQ290303.1 88.462 26 3 0 312 337 4222 4299 3.61e-12 57.7
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/three_human_mRNA.dbinfo.txt
--- a/test-data/three_human_mRNA.dbinfo.txt Mon Sep 18 06:21:27 2017 -0400
+++ b/test-data/three_human_mRNA.dbinfo.txt Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,7 +1,7 @@
 Database: Just 3 human mRNA sequences
  3 sequences; 10,732 total bases
 
-Date: Dec 2, 2016  10:38 AM Longest sequence: 4,796 bases
+Date: Oct 31, 2017  4:23 PM Longest sequence: 4,796 bases
 
 Volumes:
- /mnt/shared/users/pc40583/repositories/galaxy_blast/test-data/three_human_mRNA.fasta
+ /mnt/galaxy/repositories/galaxy_blast/test-data/three_human_mRNA.fasta
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/three_human_mRNA.fasta.gz
b
Binary file test-data/three_human_mRNA.fasta.gz has changed
b
diff -r 6f386c5dc4fb -r 31e517610e1f test-data/three_human_mRNA.fasta.nin
b
Binary file test-data/three_human_mRNA.fasta.nin has changed
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/README.rst Sat Jun 30 17:22:46 2018 -0400
b
@@ -1,10 +1,9 @@
 Galaxy wrappers for NCBI BLAST+ suite
 =====================================
 
-These wrappers are copyright 2010-2017 by Peter Cock (The James Hutton Institute,
-UK) and additional contributors including Edward Kirton, John Chilton,
-Nicola Soranzo, Jim Johnson, and Bjoern Gruening.
-
+These wrappers are copyright 2010-2018 by Peter Cock (James Hutton Institute,
+UK) and additional contributors including Edward Kirton, John Chilton, Nicola
+Soranzo, Jim Johnson, Bjoern Gruening, Caleb Easterly, and Anton Nekrutenko.
 See the licence text below.
 
 Note this does not work with the NCBI 'legacy' BLAST suite written in C
@@ -31,13 +30,13 @@
 
 NCBI BLAST+ integrated into Galaxy.
 P.J.A. Cock, J.M. Chilton, B. Gruening, J.E. Johnson, N. Soranzo
-GigaScience, 2015, 4:39 http://dx.doi.org/10.1186/s13742-015-0080-7
+GigaScience, 2015, 4:39 https://doi.org/10.1186/s13742-015-0080-7
 
 You should also cite the NCBI BLAST+ tools:
 
 BLAST+: architecture and applications.
 C. Camacho et al. BMC Bioinformatics 2009, 10:421.
-DOI: http://dx.doi.org/10.1186/1471-2105-10-421
+https://doi.org/10.1186/1471-2105-10-421
 
 
 Automated Installation
@@ -248,6 +247,18 @@
           parameter (contribution from Nicola Soranzo).
         - Add ``-max_hsps`` option (contribution from Nicola Soranzo).
         - Add ``-use_sw_tback`` option for BLASTP (Nicola Soranzo).
+v0.2.02 - Document the BLAST+ 2.5.0 change in the standard 12 column output
+          from ``qseqid,sseqid,...`` to ``qacc,sacc,...`` instead.
+        - Support for per-matrix recommended gaps settings (``-gapopen`` and
+          ``-gapextend``, contribution from Caleb Easterly and Jim Johnson).
+        - Support for ``-window_size``, ``-threshold``, ``-comp_based_stats``
+          and revising ``-word_size`` to avoid using zero to mean  default
+          (contribution from Caleb Easterly).
+v0.3.0  - Updated for NCBI BLAST+ 2.7.1,
+        - Depends on BioConda or legacy ToolShed ``package_blast_plus_2_7_1``.
+        - Document the BLAST+ 2.6.0 change in the standard 12 column output
+          from ``qacc,sacc,...`` to ``qaccver,saccver,...`` instead.
+        - Accept gzipped FASTA inputs (contribution from Anton Nekrutenko).
 ======= ======================================================================
 
 
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/blastxml_to_tabular.py
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Sat Jun 30 17:22:46 2018 -0400
b
@@ -108,6 +108,9 @@
 The columns option can be 'std' (standard 12 columns), 'ext'
 (extended 25 columns), or a list of BLAST+ column names like
 'qseqid,sseqid,pident' (space or comma separated).
+
+Note if using a list of column names, currently ONLY the 25
+extended column names are supported.
 """
 parser = OptionParser(usage=usage)
 parser.add_option('-o', '--output', dest='output', default=None,
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/blastxml_to_tabular.xml
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -200,7 +200,7 @@
 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
 Galaxy tools and workflows for sequence analysis with applications
 in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
+https://doi.org/10.7717/peerj.167
 
 This wrapper is available to install into other Galaxy Instances via the Galaxy
 Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/check_no_duplicates.py
--- a/tools/ncbi_blast_plus/check_no_duplicates.py Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/check_no_duplicates.py Sat Jun 30 17:22:46 2018 -0400
[
@@ -9,10 +9,11 @@
 will return a non-zero error if any duplicate identifiers
 are found.
 """
-
+import gzip
 import os
 import sys
 
+
 if "-v" in sys.argv or "--version" in sys.argv:
     print("v0.0.23")
     sys.exit(0)
@@ -24,7 +25,19 @@
         sys.stderr.write("Missing FASTA file %r\n" % filename)
         sys.exit(2)
     files += 1
-    handle = open(filename)
+
+    with open(filename, "rb") as binary_handle:
+        magic = binary_handle.read(2)
+    if not magic:
+        # Empty file, special case
+        continue
+    elif magic == b'\x1f\x8b':
+        # Gzipped
+        handle = gzip.open(filename, "rt")
+    elif magic[0:1] == b">":
+        # Not gzipped, shoudl be plain FASTA
+        handle = open(filename, "r")
+
     for line in handle:
         if line.startswith(">"):
             # The split will also take care of the new line character,
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -131,6 +131,10 @@
 
 -------
 
+@CLI_OPTIONS
+
+-------
+
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastn_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
[
@@ -7,31 +7,43 @@
     <expand macro="parallelism" />
     <expand macro="preamble" />
     <command detect_errors="aggressive">
+<![CDATA[
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 blastn
--query '$query'
+#if $query.is_of_type('fasta.gz'):
+-query <(gunzip -c '${query}')
+#else:
+-query '${query}'
+#end if
 @BLAST_DB_SUBJECT@
--task $blast_type
--evalue $evalue_cutoff
+-task '${blast_type}'
+-evalue '${evalue_cutoff}'
 @BLAST_OUTPUT@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
-$adv_opts.strand
+${adv_opts.strand}
 @ADV_FILTER_QUERY@
 @ADV_MAX_HITS@
 @ADV_WORD_SIZE@
 #if (str($adv_opts.identity_cutoff) and float(str($adv_opts.identity_cutoff)) > 0 ):
--perc_identity $adv_opts.identity_cutoff
+-perc_identity '${adv_opts.identity_cutoff}'
 #end if
-$adv_opts.ungapped
+${adv_opts.ungapped}
 @ADV_ID_LIST_FILTER@
 @ADV_QCOV_HSP_PERC@
+## only use window size if dc-megablast mode is used
+#if ($blast_type == "dc-megablast"):
+-window_size @ADV_WINDOW_SIZE@
+#end if
+@ADV_GAPOPEN@
+@ADV_GAPEXTEND@
 ## End of advanced options:
 #end if
+]]>
     </command>
     <inputs>
-        <param argument="-query" type="data" format="fasta" label="Nucleotide query sequence(s)"/>
+        <param argument="-query" type="data" format="fasta,fasta.gz" label="Nucleotide query sequence(s)"/>
         <expand macro="input_conditional_nucleotide_db" />
         <param name="blast_type" argument="-task" type="select" display="radio" label="Type of BLAST">
             <option value="megablast">megablast - Traditional megablast used to find very similar (e.g., intraspecies or closely related species) sequences</option>
@@ -53,13 +65,18 @@
             <expand macro="input_strand" />
             <expand macro="input_max_hits" />
             <param name="identity_cutoff" argument="-perc_identity" type="float" min="0" max="100" value="0" label="Percent identity cutoff" help="Use zero for no cutoff" />
-
-            <!-- I'd like word_size to be optional, with minimum 4 for blastn -->
-            <param argument="-word_size" type="integer" min="0" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 4" />
+            <expand macro="input_word_size" />
             <param argument="-ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
             <expand macro="input_parse_deflines" />
             <expand macro="advanced_optional_id_files" />
             <expand macro="input_qcov_hsp_perc" />
+            <!-- the help text here is unique to the blastx application, so macro not used -->
+            <param name="window_size" type="integer" optional="true" min="0"
+                label="Multiple hits window size: use 0 to specify 1-hit algorithm, leave blank for default"
+                help="Only relevant for dc-megablast, and otherwise ignored. Default window size changes with substitution matrix and BLAST type.
+                    Entering a non-negative integer will override the default."/>
+            <expand macro="input_gapopen"/>
+            <expand macro="input_gapextend"/>
         </expand>
     </inputs>
     <outputs>
@@ -91,6 +108,16 @@
         <test>
             <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
             <param name="db_opts_selector" value="file" />
+            <param name="subject" value="three_human_mRNA.fasta.gz" ftype="fasta.gz" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-40" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastn_rhodopsin_vs_three_human.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
             <param name="subject" value="three_human_mRNA.fasta" ftype="fasta" />
             <param name="database" value="" />
             <param name="evalue_cutoff" value="1e-40" />
@@ -168,6 +195,10 @@
 
 -------
 
+@CLI_OPTIONS@
+
+-------
+
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastp_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -17,7 +17,7 @@
 @BLAST_OUTPUT@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
-    -matrix $adv_opts.matrix
+    @ADV_MATRIX_GAPCOSTS@
     @ADV_FILTER_QUERY@
     @ADV_MAX_HITS@
     @ADV_WORD_SIZE@
@@ -26,6 +26,10 @@
     @ADV_ID_LIST_FILTER@
     @ADV_QCOV_HSP_PERC@
     $adv_opts.use_sw_tback
+    @ADV_WINDOW_SIZE@
+    @ADV_THRESHOLD@
+    @ADV_COMP_BASED_STATS@
+## End of advanced options:
 #end if
     </command>
     <inputs>
@@ -33,19 +37,24 @@
 
         <expand macro="input_conditional_protein_db" />
 
-        <param name="blast_type" argument="-task" type="select" display="radio" label="Type of BLAST">
+        <param name="blast_type" argument="-task" type="select" display="radio"
+            label="Type of BLAST"
+            help="See help text for default parameter values for each BLAST type.">
             <option value="blastp">blastp - Traditional BLASTP to compare a protein query to a protein database</option>
+            <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option>
             <option value="blastp-fast">blastp-fast - Use longer words for seeding, faster but less accurate</option>
-            <option value="blastp-short">blastp-short - BLASTP optimized for queries shorter than 30 residues</option>
         </param>
         <expand macro="input_evalue" />
         <expand macro="input_out_format" />
         <expand macro="advanced_options">
             <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
             <expand macro="input_filter_query_default_false" />
-            <expand macro="input_scoring_matrix" />
+            <expand macro="input_matrix_gapcosts" />
             <expand macro="input_max_hits" />
             <expand macro="input_word_size" />
+            <expand macro="input_window_size" />
+            <expand macro="input_threshold" />
+            <expand macro="input_comp_based_stats" />
             <!--
             Can't use '-ungapped' on its own, error back is:
             Composition-adjusted searched are not supported with an ungapped search, please add -comp_based_stats F or do a gapped search
@@ -76,7 +85,8 @@
             <param name="filter_query" value="false" />
             <param name="matrix" value="BLOSUM62" />
             <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
+            <param name="word_size" value="" />
+            <param name="window_size" value="40" />
             <param name="parse_deflines" value="true" />
             <param name="qcov_hsp_perc" value="25" />
             <output name="output1" file="blastp_four_human_vs_rhodopsin.xml" ftype="blastxml" />
@@ -93,7 +103,8 @@
             <param name="filter_query" value="false" />
             <param name="matrix" value="BLOSUM62" />
             <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
+            <param name="word_size" value="" />
+            <param name="window_size" value="40" />
             <param name="parse_deflines" value="true" />
             <param name="qcov_hsp_perc" value="25" />
             <output name="output1" file="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
@@ -110,7 +121,7 @@
             <param name="filter_query" value="false" />
             <param name="matrix" value="BLOSUM62" />
             <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
+            <param name="word_size" value="" />
             <param name="parse_deflines" value="true" />
             <param name="qcov_hsp_perc" value="25" />
             <output name="output1" file="blastp_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
@@ -126,6 +137,32 @@
             <param name="adv_opts_selector" value="basic" />
             <output name="output1" file="blastp_rhodopsin_vs_four_human.tabular" ftype="tabular" />
         </test>
+        <test>
+            <param name="query" value="rhodopsin_peptides.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="200000" />
+            <param name="blast_type" value="blastp-short" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="basic" />
+            <output name="output1" file="blastp_rhodopsin_peptides_vs_four_human.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="query" value="rhodopsin_proteins.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database value=" />
+            <param name="evalue_cutoff" value="1e-8"/>
+            <param name="blast_type" value="blastp" />
+            <param name="out_format" value="6"/>
+            <param name="adv_opts_selector" value="advanced"/>
+            <param name="window_size" value="35" />
+            <param name="matrix" value="BLOSUM62" />
+            <param name="gap_costs" value="-gapopen 11 -gapextend 1"/>
+            <param name="comp_based_stats" value="3" />
+            <output name="output1" file="blastp_rhodopsin_adv_vs_four_human.tabular" ftype="tabular" />
+        </test>
     </tests>
     <help>
 
@@ -138,10 +175,14 @@
 
 @FASTA_WARNING@
 
------
+-------
 
 @OUTPUT_FORMAT@
 
+------
+
+@CLI_OPTIONS@
+
 -------
 
 **References**
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastx_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -19,13 +19,16 @@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
 $adv_opts.strand
--matrix $adv_opts.matrix
+@ADV_MATRIX_GAPCOSTS@
 @ADV_FILTER_QUERY@
 @ADV_MAX_HITS@
 @ADV_WORD_SIZE@
 $adv_opts.ungapped
 @ADV_ID_LIST_FILTER@
 @ADV_QCOV_HSP_PERC@
+@ADV_WINDOW_SIZE@
+@ADV_THRESHOLD@
+@ADV_COMP_BASED_STATS@
 ## End of advanced options:
 #end if
     </command>
@@ -45,13 +48,16 @@
             <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
             <expand macro="input_filter_query_default_true" />
             <expand macro="input_strand" />
-            <expand macro="input_scoring_matrix" />
+            <expand macro="input_matrix_gapcosts" />
             <expand macro="input_max_hits" />
             <expand macro="input_word_size" />
             <param argument="-ungapped" type="boolean" label="Perform ungapped alignment only?" truevalue="-ungapped" falsevalue="" checked="false" />
             <expand macro="input_parse_deflines" />
             <expand macro="advanced_optional_id_files" />
             <expand macro="input_qcov_hsp_perc" />
+            <expand macro="input_window_size" />
+            <expand macro="input_threshold" />
+            <expand macro="input_comp_based_stats" />
         </expand>
     </inputs>
     <outputs>
@@ -105,6 +111,17 @@
             <param name="adv_opts_selector" value="basic" />
             <output name="output1" file="blastx_rhodopsin_vs_four_human_all.tabular" ftype="tabular" />
         </test>
+        <test>
+            <param name="query" value="rhodopsin_nucs.fasta" ftype="fasta" />
+            <param name="db_opts_selector" value="file" />
+            <param name="subject" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="database" value="" />
+            <param name="evalue_cutoff" value="1e-10" />
+            <param name="out_format" value="6" />
+            <param name="adv_opts_selector" value="advanced" />
+            <param name="matrix" value="BLOSUM62" />
+            <output name="output1" file="blastx_rhodopsin_adv_vs_four_human.tabular" ftype="tabular" />
+        </test>
     </tests>
     <help>
 
@@ -117,12 +134,16 @@
 
 @FASTA_WARNING@
 
------
+-------
 
 @OUTPUT_FORMAT@
 
 -------
 
+@CLI_OPTIONS@
+
+-------
+
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_dustmasker_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -86,7 +86,7 @@
 More information about dustmasker can be found in the `BLAST Command Line Applications User Manual`_.
 
 .. _BLAST Command Line Applications User Manual: https://www.ncbi.nlm.nih.gov/books/NBK279690/
-.. _DUST: http://www.ncbi.nlm.nih.gov/pubmed/16796549
+.. _DUST: https://www.ncbi.nlm.nih.gov/pubmed/16796549
 
 **References**
 
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_macros.xml
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Sat Jun 30 17:22:46 2018 -0400
[
b'@@ -1,15 +1,17 @@\n <macros>\n-    <token name="@WRAPPER_VERSION@">0.2.01</token>\n+    <token name="@WRAPPER_VERSION@">0.3.0</token>\n     <xml name="parallelism">\n         <!-- If job splitting is enabled, break up the query file into parts -->\n         <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />\n     </xml>\n+\n     <xml name="preamble">\n         <requirements>\n-            <requirement type="package" version="2.5.0">blast</requirement>\n+            <requirement type="package" version="2.7.1">blast</requirement>\n         </requirements>\n         <version_command>@BINARY@ -version</version_command>\n     </xml>\n+\n     <xml name="output_change_format">\n         <change_format>\n             <when input="output.out_format" value="0" format="txt"/>\n@@ -21,6 +23,7 @@\n             <when input="output.out_format" value="5" format="blastxml"/>\n         </change_format>\n     </xml>\n+\n     <xml name="input_out_format">\n         <conditional name="output">\n             <param name="out_format" argument="-outfmt" type="select" label="Output format">\n@@ -108,6 +111,168 @@\n             <when value="4 -html"/>\n         </conditional>\n     </xml>\n+\n+    <xml name="input_matrix_gapcosts">\n+        <conditional name="matrix_gapcosts">\n+            <param argument="-matrix" type="select" label="Scoring matrix and gap costs">\n+                <option value="" selected="true">Use Defaults</option>\n+                <option value="BLOSUM90">BLOSUM90</option>\n+                <option value="BLOSUM80">BLOSUM80</option>\n+                <option value="BLOSUM62">BLOSUM62</option>\n+                <option value="BLOSUM50">BLOSUM50</option>\n+                <option value="BLOSUM45">BLOSUM45</option>\n+                <option value="PAM250">PAM250</option>\n+                <option value="PAM70">PAM70</option>\n+                <option value="PAM30">PAM30</option>\n+            </param>\n+            <when value="">\n+            <!-- do nothing -->\n+            </when>\n+            <when value="BLOSUM90">\n+                <param name="gap_costs" type="select" label="Gap Costs">\n+                    <option value="" selected="true">Use defaults</option>\n+                    <option value="-gapopen 9 -gapextend 2">Existence: 9  Extension: 2</option>\n+                    <option value="-gapopen 8 -gapextend 2">Existence: 8  Extension: 2</option>\n+                    <option value="-gapopen 7 -gapextend 2">Existence: 7  Extension: 2</option>\n+                    <option value="-gapopen 6 -gapextend 2">Existence: 6  Extension: 2</option>\n+                    <option value="-gapopen 11 -gapextend 1">Existence: 11  Extension: 1</option>\n+                    <option value="-gapopen 10 -gapextend 1">Existence: 10  Extension: 1</option>\n+                    <option value="-gapopen 9 -gapextend 1">Existence: 9  Extension: 1</option>\n+                </param>\n+\n+            </when>\n+            <when value="BLOSUM80">\n+                <param name="gap_costs" type="select" label="Gap Costs">\n+                    <option value="" selected="true">Use defaults</option>\n+                    <option value="-gapopen 8 -gapextend 2">Existence: 8  Extension: 2</option>\n+                    <option value="-gapopen 7 -gapextend 2">Existence: 7  Extension: 2</option>\n+                    <option value="-gapopen 6 -gapextend 2">Existence: 6  Extension: 2</option>\n+                    <option value="-gapopen 11 -gapextend 1">Existence: 11  Extension: 1</option>\n+                    <option value="-gapopen 10 -gapextend 1">Existence: 10  Extension: 1</option>\n+                    <option value="-gapopen 9 -gapextend 1">Existence: 9  Extension: 1</option>\n+                </param>\n+            </when>\n+            <when value="BLOSUM62">\n+                <param name="gap_costs" type="select" label="Gap Costs">\n+                    <option value="" selected="true">Use defaults</option>\n+                    <option value="-g'..b'  ]]></token>\n     <xml name="blast_citations">\n         <citations>\n+            <citation type="doi">10.1093/nar/25.17.3389</citation>\n             <citation type="doi">10.1186/1471-2105-10-421</citation>\n             <citation type="doi">10.1186/s13742-015-0080-7</citation>\n         </citations>\n     </xml>\n-    <token name="@OUTPUT_FORMAT@">**Output format**\n+    <token name="@OUTPUT_FORMAT@"><![CDATA[\n+**Output format**\n \n Because Galaxy focuses on processing tabular data, the default output of this\n tool is tabular. The standard BLAST+ tabular output contains 12 columns:\n@@ -421,8 +686,8 @@\n ====== ========= ============================================\n Column NCBI name Description\n ------ --------- --------------------------------------------\n-     1 qseqid    Query Seq-id (ID of your sequence)\n-     2 sseqid    Subject Seq-id (ID of the database hit)\n+     1 qaccver   Query accession dot version\n+     2 saccver   Subject accession dot version (database hit)\n      3 pident    Percentage of identical matches\n      4 length    Alignment length\n      5 mismatch  Number of mismatches\n@@ -435,6 +700,12 @@\n     12 bitscore  Bit score\n ====== ========= ============================================\n \n+Until BLAST+ 2.5.0, the first two columns were ``qseqid`` and ``sseqid``,\n+which were usually strings contained multiple pipe-separated entries.\n+In BLAST+ 2.5.0, the first two columns became ``qacc`` and ``sacc``\n+(accesion only), while in BLAST+ 2.6.0 this was changed again to use\n+``qaccver`` and ``saccver`` (accession dot version).\n+\n The BLAST+ tools can optionally output additional columns of information,\n but this takes longer to calculate. Many commonly used extra columns are\n included by selecting the extended tabular output. The extra columns are\n@@ -457,7 +728,7 @@\n     22 sseq          Aligned part of subject sequence\n     23 qlen          Query sequence length\n     24 slen          Subject sequence length\n-    25 salltitles    All subject title(s), separated by a \'&lt;&gt;\'\n+    25 salltitles    All subject title(s), separated by a \'<>\'\n ====== ============= ===========================================\n \n The third option is to customise the tabular output by selecting which\n@@ -472,8 +743,9 @@\n The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query.\n The two query anchored outputs show a multiple sequence alignment between the query and all the matches,\n and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences).\n-    </token>\n-    <token name="@FASTA_WARNING@">.. class:: warningmark\n+    ]]></token>\n+    <token name="@FASTA_WARNING@"><![CDATA[\n+.. class:: warningmark\n \n You can also search against a FASTA file of subject (target)\n sequences. This is *not* advised because it is slower (only one\n@@ -481,12 +753,26 @@\n searches (very small e-values which will look overly signficiant).\n In most cases you should instead turn the other FASTA file into a\n database first using *makeblastdb* and search against that.\n-    </token>\n-    <token name="@SEARCH_TIME_WARNING@">.. class:: warningmark\n+    ]]></token>\n+    <token name="@SEARCH_TIME_WARNING@"><![CDATA[\n+.. class:: warningmark\n \n **Note**. Database searches may take a substantial amount of time.\n For large input datasets it is advisable to allow overnight processing.\n \n -----\n-    </token>\n+    ]]></token>\n+    <token name="@CLI_OPTIONS@"><![CDATA[\n+**Advanced Options**\n+\n+For help with advanced options and their default values, visit the\n+NCBI BLAST\xc2\xae Command Line Applications User Manual, Appendices,\n+`Options for the command-line applications\n+<https://www.ncbi.nlm.nih.gov/books/NBK279684/#_appendices_Options_for_the_commandline_a_>`_.\n+\n+For amino acid substitution matrices, see `BLAST Substitution Matrices\n+<https://www.ncbi.nlm.nih.gov/books/NBK279684/#_appendices_BLAST_Substitution_Matrices_>`_ in the same\n+appendices.\n+\n+    ]]></token>\n </macros>\n'
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_makeblastdb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeblastdb.xml Sat Jun 30 17:22:46 2018 -0400
[
@@ -5,21 +5,30 @@
         <import>ncbi_macros.xml</import>
     </macros>
     <expand macro="preamble" />
-    <command detect_errors="aggressive" strict="true">
+    <command detect_errors="aggressive" strict="true"><![CDATA[
 python $__tool_directory__/check_no_duplicates.py
 ##First check for duplicates (since BLAST+ 2.2.28 fails to do so)
 ##and abort (via the ampersand ampersand trick) if any are found.
 #for i in $input_file#'${i}' #end for#
-&amp;&amp;
-makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}'
+&&
+##makeblastdb does not like input redirects of the sort
+##makeblastdb -in <(gunzip -c gzipped_fasta_file)
+##therefore we're cramming everything
+##into a single cat command below
+cat
+#for i in $input_file:
+    #if $i.is_of_type('fasta.gz'):
+        <(gunzip -c ${i})
+    #else:
+        ${i}
+    #end if
+#end for
+| makeblastdb -out '${os.path.join($outfile.files_path, "blastdb")}'
 $parse_seqids
 $hash_index
-## Single call to -in with multiple filenames space separated with outer quotes
-## (presumably any filenames with spaces would be a problem). Note this gives
-## some extra spaces, e.g. -in "file1 file2 file3 " but BLAST seems happy:
--in '#for i in $input_file#${i} #end for#'
+-in -
 #if $title:
--title '$title'
+-title '${title}'
 #else:
 ##Would default to being based on the cryptic Galaxy filenames, which is unhelpful
 -title 'BLAST Database'
@@ -46,8 +55,8 @@
 #end if
 ## --------------------------------------------------------------------
 ## Capture the stdout log information to the primary file (plain text):
-&gt; "$outfile"
-    </command>
+> '$outfile'
+    ]]></command>
     <inputs>
         <param argument="-dbtype" type="select" display="radio" label="Molecule type of input">
             <option value="prot">protein</option>
@@ -57,7 +66,7 @@
              NOTE Double check the new database would be self contained first
         -->
         <!-- Note this is a mandatory parameter - default should be most recent FASTA file -->
-        <param name="input_file" argument="-in" type="data" multiple="true" optional="false" format="fasta" label="Input FASTA files(s)" help="One or more FASTA files" />
+        <param name="input_file" argument="-in" type="data" multiple="true" optional="false" format="fasta,fasta.gz" label="Input FASTA files(s)" help="One or more FASTA files" />
         <param argument="-title" type="text" value="" label="Title for BLAST database" help="This is the database name shown in BLAST search output" />
         <param argument="-parse_seqids" type="boolean" truevalue="-parse_seqids" falsevalue="" checked="false" label="Parse the sequence identifiers" help="This is only advised if your FASTA file follows the NCBI naming conventions using pipe '|' symbols" />
         <param argument="-hash_index" type="boolean" truevalue="-hash_index" falsevalue="" checked="true" label="Enable the creation of sequence hash values" help="These hash values can then be used to quickly determine if a given sequence data exists in this BLAST database." />
@@ -110,7 +119,7 @@
             <param name="hash_index" value="true" />
             <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">
                 <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />
-                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />
+                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" compare="sim_size" delta="0" />
                 <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />
                 <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />
                 <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />
@@ -129,7 +138,7 @@
             <param name="taxid" value="9606" />
             <output name="outfile" compare="contains" file="four_human_proteins_taxid.fasta.log.txt" ftype="blastdbp">
                 <extra_files type="file" value="four_human_proteins_taxid.fasta.phr" name="blastdb.phr" />
-                <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" lines_diff="2" />
+                <extra_files type="file" value="four_human_proteins_taxid.fasta.pin" name="blastdb.pin" compare="sim_size" delta="0" />
                 <extra_files type="file" value="four_human_proteins_taxid.fasta.psq" name="blastdb.psq" />
                 <extra_files type="file" value="four_human_proteins_taxid.fasta.pog" name="blastdb.pog" />
                 <extra_files type="file" value="four_human_proteins_taxid.fasta.phd" name="blastdb.phd" />
@@ -147,7 +156,7 @@
             <param name="mask_data_file" value="segmasker_four_human.maskinfo-asn1" ftype="maskinfo-asn1" />
             <output name="outfile" compare="contains" file="four_human_proteins.fasta.log.txt" ftype="blastdbp">
                 <extra_files type="file" value="four_human_proteins.fasta.phr" name="blastdb.phr" />
-                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" lines_diff="2" />
+                <extra_files type="file" value="four_human_proteins.fasta.pin" name="blastdb.pin" compare="sim_size" delta="0" />
                 <extra_files type="file" value="four_human_proteins.fasta.psq" name="blastdb.psq" />
                 <extra_files type="file" value="four_human_proteins.fasta.pog" name="blastdb.pog" />
                 <extra_files type="file" value="four_human_proteins.fasta.phd" name="blastdb.phd" />
@@ -158,7 +167,7 @@
         </test>
         <test>
             <param name="dbtype" value="nucl" />
-            <param name="input_file" value="three_human_mRNA.fasta" ftype="fasta" />
+            <param name="input_file" value="three_human_mRNA.fasta.gz" ftype="fasta.gz" />
             <param name="title" value="Just 3 human mRNA sequences" />
             <param name="parse_seqids" value="" />
             <param name="hash_index" value="true" />
@@ -166,7 +175,7 @@
             <param name="taxid" value="9606" />
             <output name="outfile" compare="contains" file="three_human_mRNA.fasta.log.txt" ftype="blastdbn">
                 <extra_files type="file" value="three_human_mRNA.fasta.nhr" name="blastdb.nhr" />
-                <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" lines_diff="2" />
+                <extra_files type="file" value="three_human_mRNA.fasta.nin" name="blastdb.nin" compare="sim_size" delta="8" />
                 <extra_files type="file" value="three_human_mRNA.fasta.nsq" name="blastdb.nsq" />
                 <extra_files type="file" value="three_human_mRNA.fasta.nog" name="blastdb.nog" />
                 <extra_files type="file" value="three_human_mRNA.fasta.nhd" name="blastdb.nhd" />
@@ -184,15 +193,16 @@
 This is a wrapper for the NCBI BLAST+ tool 'makeblastdb', which is the
 replacement for the 'formatdb' tool in the NCBI 'legacy' BLAST suite.
 
+More information about makeblastdb can be found in the `BLAST Command Line Applications User Manual`_.
+
+.. _BLAST Command Line Applications User Manual: https://www.ncbi.nlm.nih.gov/books/NBK279690/
+
+
 <!--
 Applying masks to an existing BLAST database will not change the original database; a new database will be created.
 For this reason, it's best to apply all masks at once to minimize the number of unnecessary intermediate databases.
 -->
 
-**Documentation**
-
-https://www.ncbi.nlm.nih.gov/books/NBK279690/
-
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -23,6 +23,7 @@
 @ADV_FILTER_QUERY@
 @ADV_MAX_HITS@
 @ADV_QCOV_HSP_PERC@
+@ADV_WINDOW_SIZE@
 ## End of advanced options:
 #end if
     </command>
@@ -41,6 +42,7 @@
             <expand macro="input_max_hits" />
             <expand macro="input_parse_deflines" />
             <expand macro="input_qcov_hsp_perc" />
+            <expand macro="input_window_size" />
         </expand>
     </inputs>
     <outputs>
@@ -73,15 +75,15 @@
 (PSSMs) and are available for a number of domain collections including:
 
 *CDD* - NCBI curarated meta-collection of domains, see
-http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains
+https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains
 
 *Kog* - PSSMs from automatically aligned sequences and sequence
 fragments classified in the KOGs resource, the eukaryotic
-counterpart to COGs, see http://www.ncbi.nlm.nih.gov/COG/
+counterpart to COGs, see https://www.ncbi.nlm.nih.gov/COG/
 
 *Cog* - PSSMs from automatically aligned sequences and sequence
 fragments classified in the COGs resource, which focuses primarily
-on prokaryotes, see http://www.ncbi.nlm.nih.gov/COG/
+on prokaryotes, see https://www.ncbi.nlm.nih.gov/COG/
 
 *Pfam* - PSSMs from Pfam-A seed alignment database, see
 http://xfam.org/
@@ -90,11 +92,11 @@
 http://smart.embl-heidelberg.de/
 
 *Tigr* - PSSMs from TIGRFAM database of protein families, see
-http://www.jcvi.org/cms/research/projects/tigrfams/overview/
+ftp://ftp.jcvi.org/pub/data/TIGRFAMs/
 
 *Prk* - PSSms from automatically aligned stable clusters in the
 Protein Clusters database, see
-http://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&amp;db=proteinclusters
+https://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&amp;db=proteinclusters
 
 The exact list of domain databases offered will depend on how your
 local Galaxy has been configured.
@@ -105,6 +107,10 @@
 
 -------
 
+@CLI_OPTIONS@
+
+-------
+
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -73,15 +73,15 @@
 (PSSMs) and are available for a number of domain collections including:
 
 *CDD* - NCBI curarated meta-collection of domains, see
-http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains
+https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains
 
 *Kog* - PSSMs from automatically aligned sequences and sequence
 fragments classified in the KOGs resource, the eukaryotic
-counterpart to COGs, see http://www.ncbi.nlm.nih.gov/COG/
+counterpart to COGs, see https://www.ncbi.nlm.nih.gov/COG/
 
 *Cog* - PSSMs from automatically aligned sequences and sequence
 fragments classified in the COGs resource, which focuses primarily
-on prokaryotes, see http://www.ncbi.nlm.nih.gov/COG/
+on prokaryotes, see https://www.ncbi.nlm.nih.gov/COG/
 
 *Pfam* - PSSMs from Pfam-A seed alignment database, see
 http://xfam.org/
@@ -90,11 +90,11 @@
 http://smart.embl-heidelberg.de/
 
 *Tigr* - PSSMs from TIGRFAM database of protein families, see
-http://www.jcvi.org/cms/research/projects/tigrfams/overview/
+ftp://ftp.jcvi.org/pub/data/TIGRFAMs/
 
 *Prk* - PSSms from automatically aligned stable clusters in the
 Protein Clusters database, see
-http://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&amp;db=proteinclusters
+https://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&amp;db=proteinclusters
 
 The exact list of domain databases offered will depend on how your
 local Galaxy has been configured.
@@ -105,6 +105,10 @@
 
 -------
 
+@CLI_OPTIONS@
+
+-------
+
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_segmasker_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -88,7 +88,7 @@
 More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
 
 .. _BLAST Command Line Applications User Manual: https://www.ncbi.nlm.nih.gov/books/NBK279690/
-.. _SEG: http://www.ncbi.nlm.nih.gov/pubmed/8743706
+.. _SEG: https://www.ncbi.nlm.nih.gov/pubmed/8743706
 
 **References**
 
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -18,14 +18,17 @@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
 -db_gencode $adv_opts.db_gencode
--matrix $adv_opts.matrix
 @ADV_FILTER_QUERY@
 @ADV_MAX_HITS@
 @ADV_WORD_SIZE@
+@ADV_MATRIX_GAPCOSTS@
 ##Ungapped disabled for now - see comments below
 ##$adv_opts.ungapped
 @ADV_ID_LIST_FILTER@
 @ADV_QCOV_HSP_PERC@
+@ADV_WINDOW_SIZE@
+@ADV_THRESHOLD@
+@ADV_COMP_BASED_STATS@
 ## End of advanced options:
 #end if
     </command>
@@ -44,7 +47,7 @@
 
             <!-- Could use a select (yes, no, other) where other allows setting 'window locut hicut' -->
             <expand macro="input_filter_query_default_true" />
-            <expand macro="input_scoring_matrix" />
+            <expand macro="input_matrix_gapcosts" />
             <expand macro="input_max_hits" />
             <expand macro="input_word_size" />
             <!--
@@ -56,6 +59,9 @@
             <expand macro="input_parse_deflines" />
             <expand macro="advanced_optional_id_files" />
             <expand macro="input_qcov_hsp_perc" />
+            <expand macro="input_window_size" />
+            <expand macro="input_threshold" />
+            <expand macro="input_comp_based_stats" />
         </expand>
     </inputs>
     <outputs>
@@ -75,7 +81,7 @@
             <param name="filter_query" value="false" />
             <param name="matrix" value="BLOSUM80" />
             <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
+            <param name="word_size" value="" />
             <param name="parse_deflines" value="false" />
             <output name="output1" file="tblastn_four_human_vs_rhodopsin.xml" ftype="blastxml" />
         </test>
@@ -90,7 +96,7 @@
             <param name="filter_query" value="false" />
             <param name="matrix" value="BLOSUM80" />
             <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
+            <param name="word_size" value="" />
             <param name="parse_deflines" value="false" />
             <output name="output1" file="tblastn_four_human_vs_rhodopsin_ext.tabular" ftype="tabular" />
         </test>
@@ -105,7 +111,7 @@
             <param name="filter_query" value="false" />
             <param name="matrix" value="BLOSUM80" />
             <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
+            <param name="word_size" value="" />
             <param name="parse_deflines" value="false" />
             <output name="output1" file="tblastn_four_human_vs_rhodopsin.tabular" ftype="tabular" />
         </test>
@@ -122,7 +128,7 @@
             <param name="filter_query" value="false" />
             <param name="matrix" value="BLOSUM80" />
             <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
+            <param name="word_size" value="" />
             <param name="parse_deflines" value="true" />
             <output name="output1" file="tblastn_four_human_vs_rhodopsin_deflines.tabular" ftype="tabular" />
         </test>
@@ -138,7 +144,7 @@
             <param name="filter_query" value="false" />
             <param name="matrix" value="BLOSUM80" />
             <param name="max_hits" value="0" />
-            <param name="word_size" value="0" />
+            <param name="word_size" value="" />
             <param name="parse_deflines" value="false" />
             <output name="output1" file="tblastn_four_human_vs_rhodopsin.html" ftype="html" lines_diff="8" />
         </test>
@@ -154,11 +160,15 @@
 
 @FASTA_WARNING@
 
------
+------
 
 @OUTPUT_FORMAT@
 
--------
+------
+
+@CLI_OPTIONS
+
+------
 
 **References**
 
b
diff -r 6f386c5dc4fb -r 31e517610e1f tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Mon Sep 18 06:21:27 2017 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml Sat Jun 30 17:22:46 2018 -0400
b
@@ -17,14 +17,15 @@
 @BLAST_OUTPUT@
 @THREADS@
 #if $adv_opts.adv_opts_selector=="advanced":
--db_gencode $adv_opts.db_gencode
-$adv_opts.strand
--matrix $adv_opts.matrix
-@ADV_FILTER_QUERY@
-@ADV_MAX_HITS@
-@ADV_WORD_SIZE@
-@ADV_ID_LIST_FILTER@
-@ADV_QCOV_HSP_PERC@
+    -db_gencode $adv_opts.db_gencode
+    $adv_opts.strand
+    @ADV_MATRIX@
+    @ADV_FILTER_QUERY@
+    @ADV_MAX_HITS@
+    @ADV_WORD_SIZE@
+    @ADV_ID_LIST_FILTER@
+    @ADV_QCOV_HSP_PERC@
+    @ADV_THRESHOLD@
 ## End of advanced options:
 #end if
     </command>
@@ -49,6 +50,7 @@
             <expand macro="input_parse_deflines" />
             <expand macro="advanced_optional_id_files" />
             <expand macro="input_qcov_hsp_perc" />
+            <expand macro="input_threshold" />
         </expand>
     </inputs>
     <outputs>
@@ -85,6 +87,10 @@
 
 -------
 
+@CLI_OPTIONS@
+
+-------
+
 **References**
 
 If you use this Galaxy tool in work leading to a scientific publication please