Galaxy |

Changeset 26:2889433c7ae1 (2019-07-20)

Previous changeset 25:e25d3acf6e68 (2018-10-23) Next changeset 27:6f8ea4b9a2c4 (2020-09-09)

Commit message:
v0.3.3 - fixed legacy dependecy definition

modified:
test-data/cd00003_and_cd00008.pin
tools/ncbi_blast_plus/README.rst
tools/ncbi_blast_plus/blastxml_to_tabular.py
tools/ncbi_blast_plus/check_no_duplicates.py
tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
tools/ncbi_blast_plus/ncbi_macros.xml
tools/ncbi_blast_plus/ncbi_makeprofiledb.xml
tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
tools/ncbi_blast_plus/repository_dependencies.xml
tools/ncbi_blast_plus/tool_dependencies.xml

added:
test-data/rhodopsin_nucs.blastdbcmd.txt

diff -r e25d3acf6e68 -r 2889433c7ae1 test-data/cd00003_and_cd00008.pin

Binary file test-data/cd00003_and_cd00008.pin has changed

diff -r e25d3acf6e68 -r 2889433c7ae1 test-data/rhodopsin_nucs.blastdbcmd.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rhodopsin_nucs.blastdbcmd.txt Sat Jul 20 18:36:36 2019 -0400

@@ -0,0 +1,7 @@
+gi|57163782|ref|NM_001009242.1|
+gi|2734705|gb|U59921.1|BBU59921 "1 -" +
+gi|283855845|gb|GQ290303.1| 1-4301 +
+gi|283855822|gb|GQ290312.1| "1-983"
+gi|18148870|dbj|AB062417.1| "1 -" +
+gi|12583664|dbj|AB043817.1| "1--"
+

diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/README.rst
--- a/tools/ncbi_blast_plus/README.rst Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/README.rst Sat Jul 20 18:36:36 2019 -0400

@@ -264,6 +264,10 @@
           output format it must be mapped to different command line arguments.
         - Extend gzipped query support to all the command line tools.
         - Workaround for gzipped support under Galaxy release 16.01 or older.
+v0.3.2  - Fixed incomplete ``@CLI_OPTIONS@`` macro in the help text for the
+          ``tblastn`` and ``blastdbcmd`` wrappers.
+v0.3.3  - Fixed ``tool_dependencies.xml`` to use BLAST+ 2.7.1 (useful only for
+          older Galaxy instances - we recommend conda for dependencies now).
======= ======================================================================

diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/blastxml_to_tabular.py
--- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Sat Jul 20 18:36:36 2019 -0400

[

b'@@ -81,12 +81,14 @@\n else:\n from galaxy import eggs # noqa - ignore flake8 F401\n import pkg_resources\n+\n pkg_resources.require("elementtree")\n from elementtree import ElementTree\n \n if len(sys.argv) == 4 and sys.argv[3] in ["std", "x22", "ext"]:\n # False positive if user really has a BLAST XML file called \'std\' or \'ext\'...\n- sys.exit("""ERROR: The script API has changed, sorry.\n+ sys.exit(\n+ """ERROR: The script API has changed, sorry.\n \n Instead of the old style:\n \n@@ -99,7 +101,8 @@\n For more information, use:\n \n $ python blastxml_to_tabular.py -h\n-""")\n+"""\n+ )\n \n usage = """usage: %prog [options] blastxml[,...]\n \n@@ -113,16 +116,29 @@\n extended column names are supported.\n """\n parser = OptionParser(usage=usage)\n-parser.add_option(\'-o\', \'--output\', dest=\'output\', default=None,\n- help=\'output filename (defaults to stdout)\',\n- metavar="FILE")\n-parser.add_option("-c", "--columns", dest="columns", default=\'std\',\n- help="[std|ext|col1,col2,...] standard 12 columns, extended 25 columns, or list of column names")\n+parser.add_option(\n+ "-o",\n+ "--output",\n+ dest="output",\n+ default=None,\n+ help="output filename (defaults to stdout)",\n+ metavar="FILE",\n+)\n+parser.add_option(\n+ "-c",\n+ "--columns",\n+ dest="columns",\n+ default="std",\n+ help="[std|ext|col1,col2,...] standard 12 columns, "\n+ "extended 25 columns, or list of column names",\n+)\n (options, args) = parser.parse_args()\n \n-colnames = (\'qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,\'\n- \'sstart,send,evalue,bitscore,sallseqid,score,nident,positive,\'\n- \'gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles\').split(\',\')\n+colnames = (\n+ "qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,"\n+ "sstart,send,evalue,bitscore,sallseqid,score,nident,positive,"\n+ "gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"\n+).split(",")\n \n if len(args) < 1:\n sys.exit("ERROR: No BLASTXML input files given; run with --help to see options.")\n@@ -148,7 +164,9 @@\n assert set(colnames).issuperset(cols), cols\n if not cols:\n sys.exit("No columns selected!")\n- extended = max(colnames.index(c) for c in cols) >= 12 # Do we need any higher columns?\n+ extended = (\n+ max(colnames.index(c) for c in cols) >= 12\n+ ) # Do we need any higher columns?\n del out_fmt\n \n for in_file in args:\n@@ -156,15 +174,15 @@\n sys.exit("Input BLAST XML file not found: %s" % in_file)\n \n \n-re_default_query_id = re.compile("^Query_\\d+$")\n-assert re_default_query_id.match("Query_101")\n-assert not re_default_query_id.match("Query_101a")\n-assert not re_default_query_id.match("MyQuery_101")\n-re_default_subject_id = re.compile("^Subject_\\d+$")\n-assert re_default_subject_id.match("Subject_1")\n-assert not re_default_subject_id.match("Subject_")\n-assert not re_default_subject_id.match("Subject_12a")\n-assert not re_default_subject_id.match("TheSubject_1")\n+re_default_query_id = re.compile(r"^Query_\\d+$")\n+assert re_default_query_id.match(r"Query_101")\n+assert not re_default_query_id.match(r"Query_101a")\n+assert not re_default_query_id.match(r"MyQuery_101")\n+re_default_subject_id = re.compile(r"^Subject_\\d+$")\n+assert re_default_subject_id.match(r"Subject_1")\n+assert not re_default_subject_id.match(r"Subject_")\n+assert not re_default_subject_id.match(r"Subject_12a")\n+assert not re_default_subject_id.match(r"TheSubject_1")\n \n \n def convert(blastxml_filename, output_handle):\n@@ -213,7 +231,8 @@\n # <Hit_accession>P56514</Hit_accession>\n # or,\n # <Hit_id>Subject_1</Hit_id>\n- # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def>\n+ # <Hit_def>gi|57163783|ref|NP_001009242.1|\n+ # rhodopsin [Felis catus]</Hit_def>\n # <Hit_accession>Subject_1</Hit_accession>\n #\n # apparently '..b'dtext("Hsp_hit-from"), # sstart,\n+ hsp.findtext("Hsp_hit-to"), # send,\n+ evalue, # hsp.findtext("Hsp_evalue") in scientific notation\n+ bitscore, # hsp.findtext("Hsp_bit-score") rounded\n+ ]\n \n if extended:\n try:\n- sallseqid = ";".join(name.split(None, 1)[0] for name in hit_def.split(" >"))\n- salltitles = "<>".join(name.split(None, 1)[1] for name in hit_def.split(" >"))\n+ sallseqid = ";".join(\n+ name.split(None, 1)[0] for name in hit_def.split(" >")\n+ )\n+ salltitles = "<>".join(\n+ name.split(None, 1)[1] for name in hit_def.split(" >")\n+ )\n except IndexError as e:\n- sys.exit("Problem splitting multuple hits?\\n%r\\n--> %s" % (hit_def, e))\n+ sys.exit(\n+ "Problem splitting multuple hits?\\n%r\\n--> %s"\n+ % (hit_def, e)\n+ )\n # print(hit_def, "-->", sallseqid)\n positive = hsp.findtext("Hsp_positive")\n ppos = "%0.2f" % (100 * float(positive) / float(length))\n qframe = hsp.findtext("Hsp_query-frame")\n sframe = hsp.findtext("Hsp_hit-frame")\n if blast_program == "blastp":\n- # Probably a bug in BLASTP that they use 0 or 1 depending on format\n+ # Probably a bug in BLASTP that they use 0 or 1\n+ # depending on format\n if qframe == "0":\n qframe = "1"\n if sframe == "0":\n sframe = "1"\n slen = int(hit.findtext("Hit_len"))\n- values.extend([sallseqid,\n- hsp.findtext("Hsp_score"), # score,\n- nident,\n- positive,\n- hsp.findtext("Hsp_gaps"), # gaps,\n- ppos,\n- qframe,\n- sframe,\n- # NOTE - for blastp, XML shows original seq, tabular uses XXX masking\n- q_seq,\n- h_seq,\n- str(qlen),\n- str(slen),\n- salltitles,\n- ])\n+ values.extend(\n+ [\n+ sallseqid,\n+ hsp.findtext("Hsp_score"), # score,\n+ nident,\n+ positive,\n+ hsp.findtext("Hsp_gaps"), # gaps,\n+ ppos,\n+ qframe,\n+ sframe,\n+ # NOTE - for blastp, XML shows original seq,\n+ # tabular uses XXX masking\n+ q_seq,\n+ h_seq,\n+ str(qlen),\n+ str(slen),\n+ salltitles,\n+ ]\n+ )\n if cols:\n # Only a subset of the columns are needed\n values = [values[colnames.index(c)] for c in cols]\n'

diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/check_no_duplicates.py
--- a/tools/ncbi_blast_plus/check_no_duplicates.py Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/check_no_duplicates.py Sat Jul 20 18:36:36 2019 -0400

[

@@ -31,7 +31,7 @@
     if not magic:
         # Empty file, special case
         continue
-    elif magic == b'\x1f\x8b':
+    elif magic == b"\x1f\x8b":
         # Gzipped
         handle = gzip.open(filename, "rt")
     elif magic[0:1] == b">":

diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Sat Jul 20 18:36:36 2019 -0400

@@ -8,7 +8,7 @@
     <command detect_errors="aggressive" strict="true">
## The command is a Cheetah template which allows some Python based syntax.
## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
-blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}"
+blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path.replace(',',' ')}"

##TODO: What about -ctrl_a and -target_only as advanced options?

@@ -55,7 +55,7 @@
               <option value="prompt">User entered</option>
             </param>
             <when value="file">
-                <param name="entries" argument="-entry_batch" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line (i.e. single column tabular file)"/>
+                <param name="entries" argument="-entry_batch" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line, optionally with space separated range, strand, and algorithm."/>
             </when>
             <when value="prompt">
                 <param name="entries" argument="-entry" type="text" optional="false" area="true" size="10x30" label="Sequence identifier(s)" help="Comma or new line separated list"/>
@@ -88,6 +88,15 @@
             <output name="seq" file="rhodopsin_nucs.no_gi.fasta" ftype="fasta" />
         </test>
         <test>
+            
+            <param name="db_opts|db_type" value="nucl" />
+            <param name="db_opts|database" value="rhodopsin_nucs" />
+            <param name="id_opts|id_type" value="file" />
+            <param name="id_opts|entries" value="rhodopsin_nucs.blastdbcmd.txt" ftype="txt" />
+            <param name="outfmt" value="original" />
+            <output name="seq" file="rhodopsin_nucs.no_gi.fasta" ftype="fasta" />
+        </test>
+        <test>
             <param name="db_opts|db_type" value="nucl" />
             <param name="db_opts|database" value="rhodopsin_nucs" />
             <param name="id_opts|id_type" value="prompt" />
@@ -111,6 +120,12 @@
Extracts FASTA formatted sequences from a BLAST database
using the NCBI BLAST+ blastdbcmd command line tool.

+When giving a text file of entries, use one line per sequence.
+Optional valies should be space separate - the simplest syntax
+is ``identifier start-end`` (where ``end`` can be just ``-``),
+or ``identifier start-end strand`` (wheere the strand given as
+either ``+`` or ``-``).
+
.. class:: warningmark

**BLAST assigned identifiers**
@@ -131,7 +146,7 @@

-------

-@CLI_OPTIONS
+@CLI_OPTIONS@

-------

diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_macros.xml
--- a/tools/ncbi_blast_plus/ncbi_macros.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_macros.xml Sat Jul 20 18:36:36 2019 -0400

@@ -1,5 +1,5 @@
<macros>
-    <token name="@WRAPPER_VERSION@">0.3.1</token>
+    <token name="@WRAPPER_VERSION@">0.3.3</token>
     <xml name="parallelism">
         
         <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />

diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_makeprofiledb.xml
--- a/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Sat Jul 20 18:36:36 2019 -0400

@@ -90,7 +90,7 @@
             <param name="contain_pssm_scores_type" value="yes" />
             <output name="outfile" file="empty_file.dat" ftype="blastdbd" >
                 <extra_files type="file" value="cd00003_and_cd00008.phr" name="blastdb.phr" />
-                <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" compare="sim_size" delta="0" />
+                <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" compare="sim_size" delta="8" />
                 <extra_files type="file" value="cd00003_and_cd00008.psq" name="blastdb.psq" />
                 <extra_files type="file" value="cd00003_and_cd00008.freq" name="blastdb.freq" />
                 <extra_files type="file" value="cd00003_and_cd00008.loo" name="blastdb.loo" />

diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml
--- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Sat Jul 20 18:36:36 2019 -0400

@@ -166,7 +166,7 @@

------

-@CLI_OPTIONS
+@CLI_OPTIONS@

------

diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/repository_dependencies.xml
--- a/tools/ncbi_blast_plus/repository_dependencies.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/repository_dependencies.xml Sat Jul 20 18:36:36 2019 -0400

@@ -1,4 +1,4 @@
-<?xml version="1.0"?>
+<?xml version="1.0" ?>
<repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format).">
- <repository changeset_revision="01b38f20197e" name="blast_datatypes" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-</repositories>
+ <repository changeset_revision="01b38f20197e" name="blast_datatypes" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu"/>
+</repositories>
\ No newline at end of file

diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/tool_dependencies.xml
--- a/tools/ncbi_blast_plus/tool_dependencies.xml Tue Oct 23 08:48:19 2018 -0400
+++ b/tools/ncbi_blast_plus/tool_dependencies.xml Sat Jul 20 18:36:36 2019 -0400

@@ -1,6 +1,6 @@
-<?xml version="1.0"?>
+<?xml version="1.0" ?>
<tool_dependency>
-    <package name="blast" version="2.5.0">
-        <repository changeset_revision="5dd2b68c7d04" name="package_blast_plus_2_5_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="blast" version="2.7.1">
+        <repository changeset_revision="2e9109a8924f" name="package_blast_plus_2_7_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"/>
     </package>
-</tool_dependency>
+</tool_dependency>
\ No newline at end of file