Repository 'ncbi_blast_plus'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus

Changeset 26:2889433c7ae1 (2019-07-20)
Previous changeset 25:e25d3acf6e68 (2018-10-23) Next changeset 27:6f8ea4b9a2c4 (2020-09-09)
Commit message:
v0.3.3 - fixed legacy dependecy definition
modified:
test-data/cd00003_and_cd00008.pin
tools/ncbi_blast_plus/README.rst
tools/ncbi_blast_plus/blastxml_to_tabular.py
tools/ncbi_blast_plus/check_no_duplicates.py
tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
tools/ncbi_blast_plus/ncbi_macros.xml
tools/ncbi_blast_plus/ncbi_makeprofiledb.xml
tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
tools/ncbi_blast_plus/repository_dependencies.xml
tools/ncbi_blast_plus/tool_dependencies.xml
added:
test-data/rhodopsin_nucs.blastdbcmd.txt
b
diff -r e25d3acf6e68 -r 2889433c7ae1 test-data/cd00003_and_cd00008.pin
b
Binary file test-data/cd00003_and_cd00008.pin has changed
b
diff -r e25d3acf6e68 -r 2889433c7ae1 test-data/rhodopsin_nucs.blastdbcmd.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_nucs.blastdbcmd.txt Sat Jul 20 18:36:36 2019 -0400
b
@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1|
+gi|2734705|gb|U59921.1|BBU59921 "1 -" +
+gi|283855845|gb|GQ290303.1| 1-4301 +
+gi|283855822|gb|GQ290312.1| "1-983"
+gi|18148870|dbj|AB062417.1| "1 -" +
+gi|12583664|dbj|AB043817.1| "1--"
+
b
diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/README.rst Sat Jul 20 18:36:36 2019 -0400
b
@@ -264,6 +264,10 @@
           output format it must be mapped to different command line arguments.
         - Extend gzipped query support to all the command line tools.
         - Workaround for gzipped support under Galaxy release 16.01 or older.
+v0.3.2  - Fixed incomplete ``@CLI_OPTIONS@`` macro in the help text for the
+          ``tblastn`` and ``blastdbcmd`` wrappers.
+v0.3.3  - Fixed ``tool_dependencies.xml`` to use BLAST+ 2.7.1 (useful only for
+          older Galaxy instances - we recommend conda for dependencies now).
 ======= ======================================================================
 
 
b
diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/blastxml_to_tabular.py
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Sat Jul 20 18:36:36 2019 -0400
[
b'@@ -81,12 +81,14 @@\n else:\n     from galaxy import eggs  # noqa - ignore flake8 F401\n     import pkg_resources\n+\n     pkg_resources.require("elementtree")\n     from elementtree import ElementTree\n \n if len(sys.argv) == 4 and sys.argv[3] in ["std", "x22", "ext"]:\n     # False positive if user really has a BLAST XML file called \'std\' or \'ext\'...\n-    sys.exit("""ERROR: The script API has changed, sorry.\n+    sys.exit(\n+        """ERROR: The script API has changed, sorry.\n \n Instead of the old style:\n \n@@ -99,7 +101,8 @@\n For more information, use:\n \n $ python blastxml_to_tabular.py -h\n-""")\n+"""\n+    )\n \n usage = """usage: %prog [options] blastxml[,...]\n \n@@ -113,16 +116,29 @@\n extended column names are supported.\n """\n parser = OptionParser(usage=usage)\n-parser.add_option(\'-o\', \'--output\', dest=\'output\', default=None,\n-                  help=\'output filename (defaults to stdout)\',\n-                  metavar="FILE")\n-parser.add_option("-c", "--columns", dest="columns", default=\'std\',\n-                  help="[std|ext|col1,col2,...] standard 12 columns, extended 25 columns, or list of column names")\n+parser.add_option(\n+    "-o",\n+    "--output",\n+    dest="output",\n+    default=None,\n+    help="output filename (defaults to stdout)",\n+    metavar="FILE",\n+)\n+parser.add_option(\n+    "-c",\n+    "--columns",\n+    dest="columns",\n+    default="std",\n+    help="[std|ext|col1,col2,...] standard 12 columns, "\n+    "extended 25 columns, or list of column names",\n+)\n (options, args) = parser.parse_args()\n \n-colnames = (\'qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,\'\n-            \'sstart,send,evalue,bitscore,sallseqid,score,nident,positive,\'\n-            \'gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles\').split(\',\')\n+colnames = (\n+    "qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,"\n+    "sstart,send,evalue,bitscore,sallseqid,score,nident,positive,"\n+    "gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"\n+).split(",")\n \n if len(args) < 1:\n     sys.exit("ERROR: No BLASTXML input files given; run with --help to see options.")\n@@ -148,7 +164,9 @@\n     assert set(colnames).issuperset(cols), cols\n     if not cols:\n         sys.exit("No columns selected!")\n-    extended = max(colnames.index(c) for c in cols) >= 12  # Do we need any higher columns?\n+    extended = (\n+        max(colnames.index(c) for c in cols) >= 12\n+    )  # Do we need any higher columns?\n del out_fmt\n \n for in_file in args:\n@@ -156,15 +174,15 @@\n         sys.exit("Input BLAST XML file not found: %s" % in_file)\n \n \n-re_default_query_id = re.compile("^Query_\\d+$")\n-assert re_default_query_id.match("Query_101")\n-assert not re_default_query_id.match("Query_101a")\n-assert not re_default_query_id.match("MyQuery_101")\n-re_default_subject_id = re.compile("^Subject_\\d+$")\n-assert re_default_subject_id.match("Subject_1")\n-assert not re_default_subject_id.match("Subject_")\n-assert not re_default_subject_id.match("Subject_12a")\n-assert not re_default_subject_id.match("TheSubject_1")\n+re_default_query_id = re.compile(r"^Query_\\d+$")\n+assert re_default_query_id.match(r"Query_101")\n+assert not re_default_query_id.match(r"Query_101a")\n+assert not re_default_query_id.match(r"MyQuery_101")\n+re_default_subject_id = re.compile(r"^Subject_\\d+$")\n+assert re_default_subject_id.match(r"Subject_1")\n+assert not re_default_subject_id.match(r"Subject_")\n+assert not re_default_subject_id.match(r"Subject_12a")\n+assert not re_default_subject_id.match(r"TheSubject_1")\n \n \n def convert(blastxml_filename, output_handle):\n@@ -213,7 +231,8 @@\n                 # <Hit_accession>P56514</Hit_accession>\n                 # or,\n                 # <Hit_id>Subject_1</Hit_id>\n-                # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def>\n+                # <Hit_def>gi|57163783|ref|NP_001009242.1|\n+                # rhodopsin [Felis catus]</Hit_def>\n                 # <Hit_accession>Subject_1</Hit_accession>\n                 #\n                 # apparently '..b'dtext("Hsp_hit-from"),  # sstart,\n+                        hsp.findtext("Hsp_hit-to"),  # send,\n+                        evalue,  # hsp.findtext("Hsp_evalue") in scientific notation\n+                        bitscore,  # hsp.findtext("Hsp_bit-score") rounded\n+                    ]\n \n                     if extended:\n                         try:\n-                            sallseqid = ";".join(name.split(None, 1)[0] for name in hit_def.split(" >"))\n-                            salltitles = "<>".join(name.split(None, 1)[1] for name in hit_def.split(" >"))\n+                            sallseqid = ";".join(\n+                                name.split(None, 1)[0] for name in hit_def.split(" >")\n+                            )\n+                            salltitles = "<>".join(\n+                                name.split(None, 1)[1] for name in hit_def.split(" >")\n+                            )\n                         except IndexError as e:\n-                            sys.exit("Problem splitting multuple hits?\\n%r\\n--> %s" % (hit_def, e))\n+                            sys.exit(\n+                                "Problem splitting multuple hits?\\n%r\\n--> %s"\n+                                % (hit_def, e)\n+                            )\n                         # print(hit_def, "-->", sallseqid)\n                         positive = hsp.findtext("Hsp_positive")\n                         ppos = "%0.2f" % (100 * float(positive) / float(length))\n                         qframe = hsp.findtext("Hsp_query-frame")\n                         sframe = hsp.findtext("Hsp_hit-frame")\n                         if blast_program == "blastp":\n-                            # Probably a bug in BLASTP that they use 0 or 1 depending on format\n+                            # Probably a bug in BLASTP that they use 0 or 1\n+                            # depending on format\n                             if qframe == "0":\n                                 qframe = "1"\n                             if sframe == "0":\n                                 sframe = "1"\n                         slen = int(hit.findtext("Hit_len"))\n-                        values.extend([sallseqid,\n-                                       hsp.findtext("Hsp_score"),  # score,\n-                                       nident,\n-                                       positive,\n-                                       hsp.findtext("Hsp_gaps"),  # gaps,\n-                                       ppos,\n-                                       qframe,\n-                                       sframe,\n-                                       # NOTE - for blastp, XML shows original seq, tabular uses XXX masking\n-                                       q_seq,\n-                                       h_seq,\n-                                       str(qlen),\n-                                       str(slen),\n-                                       salltitles,\n-                                       ])\n+                        values.extend(\n+                            [\n+                                sallseqid,\n+                                hsp.findtext("Hsp_score"),  # score,\n+                                nident,\n+                                positive,\n+                                hsp.findtext("Hsp_gaps"),  # gaps,\n+                                ppos,\n+                                qframe,\n+                                sframe,\n+                                # NOTE - for blastp, XML shows original seq,\n+                                # tabular uses XXX masking\n+                                q_seq,\n+                                h_seq,\n+                                str(qlen),\n+                                str(slen),\n+                                salltitles,\n+                            ]\n+                        )\n                     if cols:\n                         # Only a subset of the columns are needed\n                         values = [values[colnames.index(c)] for c in cols]\n'
b
diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/check_no_duplicates.py
--- a/tools/ncbi_blast_plus/check_no_duplicates.py Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/check_no_duplicates.py Sat Jul 20 18:36:36 2019 -0400
[
@@ -31,7 +31,7 @@
     if not magic:
         # Empty file, special case
         continue
-    elif magic == b'\x1f\x8b':
+    elif magic == b"\x1f\x8b":
         # Gzipped
         handle = gzip.open(filename, "rt")
     elif magic[0:1] == b">":
b
diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Sat Jul 20 18:36:36 2019 -0400
b
@@ -8,7 +8,7 @@
     <command detect_errors="aggressive" strict="true">
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
-blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}"
+blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path.replace(',',' ')}"
 
 ##TODO: What about -ctrl_a and -target_only as advanced options?
 
@@ -55,7 +55,7 @@
               <option value="prompt">User entered</option>
             </param>
             <when value="file">
-                <param name="entries" argument="-entry_batch" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line (i.e. single column tabular file)"/>
+                <param name="entries" argument="-entry_batch" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line, optionally with space separated range, strand, and algorithm."/>
             </when>
             <when value="prompt">
                 <param name="entries" argument="-entry" type="text" optional="false" area="true" size="10x30" label="Sequence identifier(s)" help="Comma or new line separated list"/>
@@ -88,6 +88,15 @@
             <output name="seq" file="rhodopsin_nucs.no_gi.fasta" ftype="fasta" />
         </test>
         <test>
+            <!-- This uses various start end frame combinations but all recover full sequence -->
+            <param name="db_opts|db_type" value="nucl" />
+            <param name="db_opts|database" value="rhodopsin_nucs" />
+            <param name="id_opts|id_type" value="file" />
+            <param name="id_opts|entries" value="rhodopsin_nucs.blastdbcmd.txt" ftype="txt" />
+            <param name="outfmt" value="original" />
+            <output name="seq" file="rhodopsin_nucs.no_gi.fasta" ftype="fasta" />
+        </test>
+        <test>
             <param name="db_opts|db_type" value="nucl" />
             <param name="db_opts|database" value="rhodopsin_nucs" />
             <param name="id_opts|id_type" value="prompt" />
@@ -111,6 +120,12 @@
 Extracts FASTA formatted sequences from a BLAST database
 using the NCBI BLAST+ blastdbcmd command line tool.
 
+When giving a text file of entries, use one line per sequence.
+Optional valies should be space separate - the simplest syntax
+is ``identifier start-end`` (where ``end`` can be just ``-``),
+or ``identifier start-end strand`` (wheere the strand given as
+either ``+`` or ``-``).
+
 .. class:: warningmark
 
 **BLAST assigned identifiers**
@@ -131,7 +146,7 @@
 
 -------
 
-@CLI_OPTIONS
+@CLI_OPTIONS@
 
 -------
 
b
diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_macros.xml
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Sat Jul 20 18:36:36 2019 -0400
b
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@WRAPPER_VERSION@">0.3.1</token>
+    <token name="@WRAPPER_VERSION@">0.3.3</token>
     <xml name="parallelism">
         <!-- If job splitting is enabled, break up the query file into parts -->
         <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
b
diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_makeprofiledb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Sat Jul 20 18:36:36 2019 -0400
b
@@ -90,7 +90,7 @@
             <param name="contain_pssm_scores_type" value="yes" />
             <output name="outfile" file="empty_file.dat" ftype="blastdbd" >
                 <extra_files type="file" value="cd00003_and_cd00008.phr" name="blastdb.phr" />
-                <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" compare="sim_size" delta="0" />
+                <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" compare="sim_size" delta="8" />
                 <extra_files type="file" value="cd00003_and_cd00008.psq" name="blastdb.psq" />
                 <extra_files type="file" value="cd00003_and_cd00008.freq" name="blastdb.freq" />
                 <extra_files type="file" value="cd00003_and_cd00008.loo" name="blastdb.loo" />
b
diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Sat Jul 20 18:36:36 2019 -0400
b
@@ -166,7 +166,7 @@
 
 ------
 
-@CLI_OPTIONS
+@CLI_OPTIONS@
 
 ------
 
b
diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/repository_dependencies.xml
--- a/tools/ncbi_blast_plus/repository_dependencies.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/repository_dependencies.xml Sat Jul 20 18:36:36 2019 -0400
b
@@ -1,4 +1,4 @@
-<?xml version="1.0"?>
+<?xml version="1.0" ?>
 <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format).">
-    <repository changeset_revision="01b38f20197e" name="blast_datatypes" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-</repositories>
+    <repository changeset_revision="01b38f20197e" name="blast_datatypes" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu"/>
+</repositories>
\ No newline at end of file
b
diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/tool_dependencies.xml
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml Sat Jul 20 18:36:36 2019 -0400
b
@@ -1,6 +1,6 @@
-<?xml version="1.0"?>
+<?xml version="1.0" ?>
 <tool_dependency>
-    <package name="blast" version="2.5.0">
-        <repository changeset_revision="5dd2b68c7d04" name="package_blast_plus_2_5_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="blast" version="2.7.1">
+        <repository changeset_revision="2e9109a8924f" name="package_blast_plus_2_7_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"/>
     </package>
-</tool_dependency>
+</tool_dependency>
\ No newline at end of file