| Previous changeset 25:e25d3acf6e68 (2018-10-23) Next changeset 27:6f8ea4b9a2c4 (2020-09-09) |
|
Commit message:
v0.3.3 - fixed legacy dependecy definition |
|
modified:
test-data/cd00003_and_cd00008.pin tools/ncbi_blast_plus/README.rst tools/ncbi_blast_plus/blastxml_to_tabular.py tools/ncbi_blast_plus/check_no_duplicates.py tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml tools/ncbi_blast_plus/ncbi_macros.xml tools/ncbi_blast_plus/ncbi_makeprofiledb.xml tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml tools/ncbi_blast_plus/repository_dependencies.xml tools/ncbi_blast_plus/tool_dependencies.xml |
|
added:
test-data/rhodopsin_nucs.blastdbcmd.txt |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 test-data/cd00003_and_cd00008.pin |
| b |
| Binary file test-data/cd00003_and_cd00008.pin has changed |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 test-data/rhodopsin_nucs.blastdbcmd.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rhodopsin_nucs.blastdbcmd.txt Sat Jul 20 18:36:36 2019 -0400 |
| b |
| @@ -0,0 +1,7 @@ +gi|57163782|ref|NM_001009242.1| +gi|2734705|gb|U59921.1|BBU59921 "1 -" + +gi|283855845|gb|GQ290303.1| 1-4301 + +gi|283855822|gb|GQ290312.1| "1-983" +gi|18148870|dbj|AB062417.1| "1 -" + +gi|12583664|dbj|AB043817.1| "1--" + |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/README.rst --- a/tools/ncbi_blast_plus/README.rst Tue Oct 23 08:48:19 2018 -0400 +++ b/tools/ncbi_blast_plus/README.rst Sat Jul 20 18:36:36 2019 -0400 |
| b |
| @@ -264,6 +264,10 @@ output format it must be mapped to different command line arguments. - Extend gzipped query support to all the command line tools. - Workaround for gzipped support under Galaxy release 16.01 or older. +v0.3.2 - Fixed incomplete ``@CLI_OPTIONS@`` macro in the help text for the + ``tblastn`` and ``blastdbcmd`` wrappers. +v0.3.3 - Fixed ``tool_dependencies.xml`` to use BLAST+ 2.7.1 (useful only for + older Galaxy instances - we recommend conda for dependencies now). ======= ====================================================================== |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/blastxml_to_tabular.py --- a/tools/ncbi_blast_plus/blastxml_to_tabular.py Tue Oct 23 08:48:19 2018 -0400 +++ b/tools/ncbi_blast_plus/blastxml_to_tabular.py Sat Jul 20 18:36:36 2019 -0400 |
| [ |
| b'@@ -81,12 +81,14 @@\n else:\n from galaxy import eggs # noqa - ignore flake8 F401\n import pkg_resources\n+\n pkg_resources.require("elementtree")\n from elementtree import ElementTree\n \n if len(sys.argv) == 4 and sys.argv[3] in ["std", "x22", "ext"]:\n # False positive if user really has a BLAST XML file called \'std\' or \'ext\'...\n- sys.exit("""ERROR: The script API has changed, sorry.\n+ sys.exit(\n+ """ERROR: The script API has changed, sorry.\n \n Instead of the old style:\n \n@@ -99,7 +101,8 @@\n For more information, use:\n \n $ python blastxml_to_tabular.py -h\n-""")\n+"""\n+ )\n \n usage = """usage: %prog [options] blastxml[,...]\n \n@@ -113,16 +116,29 @@\n extended column names are supported.\n """\n parser = OptionParser(usage=usage)\n-parser.add_option(\'-o\', \'--output\', dest=\'output\', default=None,\n- help=\'output filename (defaults to stdout)\',\n- metavar="FILE")\n-parser.add_option("-c", "--columns", dest="columns", default=\'std\',\n- help="[std|ext|col1,col2,...] standard 12 columns, extended 25 columns, or list of column names")\n+parser.add_option(\n+ "-o",\n+ "--output",\n+ dest="output",\n+ default=None,\n+ help="output filename (defaults to stdout)",\n+ metavar="FILE",\n+)\n+parser.add_option(\n+ "-c",\n+ "--columns",\n+ dest="columns",\n+ default="std",\n+ help="[std|ext|col1,col2,...] standard 12 columns, "\n+ "extended 25 columns, or list of column names",\n+)\n (options, args) = parser.parse_args()\n \n-colnames = (\'qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,\'\n- \'sstart,send,evalue,bitscore,sallseqid,score,nident,positive,\'\n- \'gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles\').split(\',\')\n+colnames = (\n+ "qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,"\n+ "sstart,send,evalue,bitscore,sallseqid,score,nident,positive,"\n+ "gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles"\n+).split(",")\n \n if len(args) < 1:\n sys.exit("ERROR: No BLASTXML input files given; run with --help to see options.")\n@@ -148,7 +164,9 @@\n assert set(colnames).issuperset(cols), cols\n if not cols:\n sys.exit("No columns selected!")\n- extended = max(colnames.index(c) for c in cols) >= 12 # Do we need any higher columns?\n+ extended = (\n+ max(colnames.index(c) for c in cols) >= 12\n+ ) # Do we need any higher columns?\n del out_fmt\n \n for in_file in args:\n@@ -156,15 +174,15 @@\n sys.exit("Input BLAST XML file not found: %s" % in_file)\n \n \n-re_default_query_id = re.compile("^Query_\\d+$")\n-assert re_default_query_id.match("Query_101")\n-assert not re_default_query_id.match("Query_101a")\n-assert not re_default_query_id.match("MyQuery_101")\n-re_default_subject_id = re.compile("^Subject_\\d+$")\n-assert re_default_subject_id.match("Subject_1")\n-assert not re_default_subject_id.match("Subject_")\n-assert not re_default_subject_id.match("Subject_12a")\n-assert not re_default_subject_id.match("TheSubject_1")\n+re_default_query_id = re.compile(r"^Query_\\d+$")\n+assert re_default_query_id.match(r"Query_101")\n+assert not re_default_query_id.match(r"Query_101a")\n+assert not re_default_query_id.match(r"MyQuery_101")\n+re_default_subject_id = re.compile(r"^Subject_\\d+$")\n+assert re_default_subject_id.match(r"Subject_1")\n+assert not re_default_subject_id.match(r"Subject_")\n+assert not re_default_subject_id.match(r"Subject_12a")\n+assert not re_default_subject_id.match(r"TheSubject_1")\n \n \n def convert(blastxml_filename, output_handle):\n@@ -213,7 +231,8 @@\n # <Hit_accession>P56514</Hit_accession>\n # or,\n # <Hit_id>Subject_1</Hit_id>\n- # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def>\n+ # <Hit_def>gi|57163783|ref|NP_001009242.1|\n+ # rhodopsin [Felis catus]</Hit_def>\n # <Hit_accession>Subject_1</Hit_accession>\n #\n # apparently '..b'dtext("Hsp_hit-from"), # sstart,\n+ hsp.findtext("Hsp_hit-to"), # send,\n+ evalue, # hsp.findtext("Hsp_evalue") in scientific notation\n+ bitscore, # hsp.findtext("Hsp_bit-score") rounded\n+ ]\n \n if extended:\n try:\n- sallseqid = ";".join(name.split(None, 1)[0] for name in hit_def.split(" >"))\n- salltitles = "<>".join(name.split(None, 1)[1] for name in hit_def.split(" >"))\n+ sallseqid = ";".join(\n+ name.split(None, 1)[0] for name in hit_def.split(" >")\n+ )\n+ salltitles = "<>".join(\n+ name.split(None, 1)[1] for name in hit_def.split(" >")\n+ )\n except IndexError as e:\n- sys.exit("Problem splitting multuple hits?\\n%r\\n--> %s" % (hit_def, e))\n+ sys.exit(\n+ "Problem splitting multuple hits?\\n%r\\n--> %s"\n+ % (hit_def, e)\n+ )\n # print(hit_def, "-->", sallseqid)\n positive = hsp.findtext("Hsp_positive")\n ppos = "%0.2f" % (100 * float(positive) / float(length))\n qframe = hsp.findtext("Hsp_query-frame")\n sframe = hsp.findtext("Hsp_hit-frame")\n if blast_program == "blastp":\n- # Probably a bug in BLASTP that they use 0 or 1 depending on format\n+ # Probably a bug in BLASTP that they use 0 or 1\n+ # depending on format\n if qframe == "0":\n qframe = "1"\n if sframe == "0":\n sframe = "1"\n slen = int(hit.findtext("Hit_len"))\n- values.extend([sallseqid,\n- hsp.findtext("Hsp_score"), # score,\n- nident,\n- positive,\n- hsp.findtext("Hsp_gaps"), # gaps,\n- ppos,\n- qframe,\n- sframe,\n- # NOTE - for blastp, XML shows original seq, tabular uses XXX masking\n- q_seq,\n- h_seq,\n- str(qlen),\n- str(slen),\n- salltitles,\n- ])\n+ values.extend(\n+ [\n+ sallseqid,\n+ hsp.findtext("Hsp_score"), # score,\n+ nident,\n+ positive,\n+ hsp.findtext("Hsp_gaps"), # gaps,\n+ ppos,\n+ qframe,\n+ sframe,\n+ # NOTE - for blastp, XML shows original seq,\n+ # tabular uses XXX masking\n+ q_seq,\n+ h_seq,\n+ str(qlen),\n+ str(slen),\n+ salltitles,\n+ ]\n+ )\n if cols:\n # Only a subset of the columns are needed\n values = [values[colnames.index(c)] for c in cols]\n' |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/check_no_duplicates.py --- a/tools/ncbi_blast_plus/check_no_duplicates.py Tue Oct 23 08:48:19 2018 -0400 +++ b/tools/ncbi_blast_plus/check_no_duplicates.py Sat Jul 20 18:36:36 2019 -0400 |
| [ |
| @@ -31,7 +31,7 @@ if not magic: # Empty file, special case continue - elif magic == b'\x1f\x8b': + elif magic == b"\x1f\x8b": # Gzipped handle = gzip.open(filename, "rt") elif magic[0:1] == b">": |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Tue Oct 23 08:48:19 2018 -0400 +++ b/tools/ncbi_blast_plus/ncbi_blastdbcmd_wrapper.xml Sat Jul 20 18:36:36 2019 -0400 |
| b |
| @@ -8,7 +8,7 @@ <command detect_errors="aggressive" strict="true"> ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces -blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path}" +blastdbcmd -dbtype $db_opts.db_type -db "${db_opts.database.fields.path.replace(',',' ')}" ##TODO: What about -ctrl_a and -target_only as advanced options? @@ -55,7 +55,7 @@ <option value="prompt">User entered</option> </param> <when value="file"> - <param name="entries" argument="-entry_batch" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line (i.e. single column tabular file)"/> + <param name="entries" argument="-entry_batch" type="data" format="txt,tabular" label="Sequence identifier(s)" help="Plain text file with one ID per line, optionally with space separated range, strand, and algorithm."/> </when> <when value="prompt"> <param name="entries" argument="-entry" type="text" optional="false" area="true" size="10x30" label="Sequence identifier(s)" help="Comma or new line separated list"/> @@ -88,6 +88,15 @@ <output name="seq" file="rhodopsin_nucs.no_gi.fasta" ftype="fasta" /> </test> <test> + <!-- This uses various start end frame combinations but all recover full sequence --> + <param name="db_opts|db_type" value="nucl" /> + <param name="db_opts|database" value="rhodopsin_nucs" /> + <param name="id_opts|id_type" value="file" /> + <param name="id_opts|entries" value="rhodopsin_nucs.blastdbcmd.txt" ftype="txt" /> + <param name="outfmt" value="original" /> + <output name="seq" file="rhodopsin_nucs.no_gi.fasta" ftype="fasta" /> + </test> + <test> <param name="db_opts|db_type" value="nucl" /> <param name="db_opts|database" value="rhodopsin_nucs" /> <param name="id_opts|id_type" value="prompt" /> @@ -111,6 +120,12 @@ Extracts FASTA formatted sequences from a BLAST database using the NCBI BLAST+ blastdbcmd command line tool. +When giving a text file of entries, use one line per sequence. +Optional valies should be space separate - the simplest syntax +is ``identifier start-end`` (where ``end`` can be just ``-``), +or ``identifier start-end strand`` (wheere the strand given as +either ``+`` or ``-``). + .. class:: warningmark **BLAST assigned identifiers** @@ -131,7 +146,7 @@ ------- -@CLI_OPTIONS +@CLI_OPTIONS@ ------- |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_macros.xml --- a/tools/ncbi_blast_plus/ncbi_macros.xml Tue Oct 23 08:48:19 2018 -0400 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Sat Jul 20 18:36:36 2019 -0400 |
| b |
| @@ -1,5 +1,5 @@ <macros> - <token name="@WRAPPER_VERSION@">0.3.1</token> + <token name="@WRAPPER_VERSION@">0.3.3</token> <xml name="parallelism"> <!-- If job splitting is enabled, break up the query file into parts --> <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" /> |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_makeprofiledb.xml --- a/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Tue Oct 23 08:48:19 2018 -0400 +++ b/tools/ncbi_blast_plus/ncbi_makeprofiledb.xml Sat Jul 20 18:36:36 2019 -0400 |
| b |
| @@ -90,7 +90,7 @@ <param name="contain_pssm_scores_type" value="yes" /> <output name="outfile" file="empty_file.dat" ftype="blastdbd" > <extra_files type="file" value="cd00003_and_cd00008.phr" name="blastdb.phr" /> - <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" compare="sim_size" delta="0" /> + <extra_files type="file" value="cd00003_and_cd00008.pin" name="blastdb.pin" compare="sim_size" delta="8" /> <extra_files type="file" value="cd00003_and_cd00008.psq" name="blastdb.psq" /> <extra_files type="file" value="cd00003_and_cd00008.freq" name="blastdb.freq" /> <extra_files type="file" value="cd00003_and_cd00008.loo" name="blastdb.loo" /> |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml --- a/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Tue Oct 23 08:48:19 2018 -0400 +++ b/tools/ncbi_blast_plus/ncbi_tblastn_wrapper.xml Sat Jul 20 18:36:36 2019 -0400 |
| b |
| @@ -166,7 +166,7 @@ ------ -@CLI_OPTIONS +@CLI_OPTIONS@ ------ |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/repository_dependencies.xml --- a/tools/ncbi_blast_plus/repository_dependencies.xml Tue Oct 23 08:48:19 2018 -0400 +++ b/tools/ncbi_blast_plus/repository_dependencies.xml Sat Jul 20 18:36:36 2019 -0400 |
| b |
| @@ -1,4 +1,4 @@ -<?xml version="1.0"?> +<?xml version="1.0" ?> <repositories description="This requires the BLAST datatype definitions (e.g. the BLAST XML format)."> - <repository changeset_revision="01b38f20197e" name="blast_datatypes" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> -</repositories> + <repository changeset_revision="01b38f20197e" name="blast_datatypes" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu"/> +</repositories> \ No newline at end of file |
| b |
| diff -r e25d3acf6e68 -r 2889433c7ae1 tools/ncbi_blast_plus/tool_dependencies.xml --- a/tools/ncbi_blast_plus/tool_dependencies.xml Tue Oct 23 08:48:19 2018 -0400 +++ b/tools/ncbi_blast_plus/tool_dependencies.xml Sat Jul 20 18:36:36 2019 -0400 |
| b |
| @@ -1,6 +1,6 @@ -<?xml version="1.0"?> +<?xml version="1.0" ?> <tool_dependency> - <package name="blast" version="2.5.0"> - <repository changeset_revision="5dd2b68c7d04" name="package_blast_plus_2_5_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="blast" version="2.7.1"> + <repository changeset_revision="2e9109a8924f" name="package_blast_plus_2_7_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"/> </package> -</tool_dependency> +</tool_dependency> \ No newline at end of file |