Galaxy |

Changeset 0:e6799e98c5fb (2022-04-05)

Next changeset 1:aea6702a3cd5 (2022-08-09)

Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blast commit 15fc6c06f743bae276ff02dc405e7da61a07bd08"

added:
macros.xml
magicblast.xml
test-data/blastdb.loc
test-data/gilist1.tabular
test-data/phiX174/blastdb.nhd
test-data/phiX174/blastdb.nhi
test-data/phiX174/blastdb.nhr
test-data/phiX174/blastdb.nin
test-data/phiX174/blastdb.nog
test-data/phiX174/blastdb.nsd
test-data/phiX174/blastdb.nsi
test-data/phiX174/blastdb.nsq
test-data/query1.fasta.gz
test-data/query_forward1.fastqsanger.gz
test-data/query_reverse1.fastqsanger.gz
test-data/subject1.fasta.gz
test-data/taxids.tabular
tool-data/blastdb.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test

diff -r 000000000000 -r e6799e98c5fb macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Apr 05 12:11:08 2022 +0000

@@ -0,0 +1,33 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.6.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.09</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">magicblast</requirement>
+            <requirement type="package" version="1.15">samtools</requirement>
+        </requirements>
+    </xml>
+    <xml name="output_sort_param">
+        <param name="output_sort" type="select" label="BAM sorting mode" help="The 'Not sorted' option can significantly extend the run time of the tool (it runs using a single thread)">
+            <option value="coordinate" selected="True">Sort by chromosomal coordinates</option>
+            <option value="name">Sort by read names</option>
+            <option value="unsorted">Not sorted (sorted as input)</option>
+        </param>
+    </xml>
+    <xml name="sanitize_query" token_validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="'"/>
+            </valid>
+            <mapping initial="none">
+                <add source="'" target="'"'"'"/>
+            </mapping>
+        </sanitizer>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1186/s12859-019-2996-x</citation>
+        </citations>
+    </xml>
+</macros>

diff -r 000000000000 -r e6799e98c5fb magicblast.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/magicblast.xml Tue Apr 05 12:11:08 2022 +0000

[

b'@@ -0,0 +1,362 @@\n+<tool id="magicblast" name="Magic-BLAST: map large RNA or DNA sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+ <description>against a whole genome or transcriptome</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <expand macro="requirements"/>\n+ <command detect_errors="exit_code"><![CDATA[\n+#import os\n+\n+magicblast\n+-num_threads \\${GALAXY_SLOTS:-8}\n+#if $query.is_of_type(\'fasta.gz\', \'fastqsanger.gz\'):\n+ -query <(gunzip -c \'${query}\')\n+#else:\n+ -query \'${query}\'\n+#end if\n+#if $query_mate:\n+ -paired\n+ #if $query.is_of_type(\'fasta.gz\', \'fastqsanger.gz\'):\n+ -query_mate <(gunzip -c \'${query}\')\n+ #else:\n+ -query_mate \'${query}\'\n+ #end if\n+#end if\n+\n+#if $query.is_of_type(\'fastqsanger\', \'fastqsanger.gz\'):\n+ -infmt fastq\n+#end if\n+\n+#if $db_opts.db_opts_selector == "histdb":\n+ -db \'${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}\'\n+#elif $db_opts.db_opts_selector == "db":\n+ -db \'${os.path.join($db_opts.database.fields.path, "blastdb")}\'\n+#else:\n+ #if $db_opts.subject.is_of_type(\'fasta.gz\'):\n+ -subject <(gunzip -c \'${$db_opts.subject}\')\n+ #else:\n+ -subject \'${db_opts.subject}\'\n+ #end if\n+#end if\n+\n+## General search options\n+-word_size $general_search.word_size\n+-gapopen $general_search.gapopen\n+-gapextend $general_search.gapextend\n+-penalty $general_search.penalty\n+-max_intron_length $general_search.max_intron_length\n+\n+## Query filtering options\n+$query_filtering.lcase_masking\n+-validate_seqs $query_filtering.validate_seqs\n+-limit_lookup $query_filtering.limit_lookup\n+-max_db_word_count $query_filtering.max_db_word_count\n+-lookup_stride $query_filtering.lookup_stride\n+\n+## Restrict database search\n+#if $restrict_search.gilist:\n+ -gilist \'$restrict_search.gilist\'\n+#end if\n+#if $restrict_search.negative_gilist:\n+ -negative_gilist \'$restrict_search.negative_gilist\'\n+#end if\n+#if $restrict_search.seqidlist:\n+ -seqidlist \'$restrict_search.seqidlist\'\n+#end if\n+#if $restrict_search.negative_seqidlist:\n+ -negative_seqidlist \'$restrict_search.negative_seqidlist\'\n+#end if\n+#if str($restrict_search.taxids) != \'\':\n+ --taxids \'$restrict_search.taxids\'\n+#end if\n+#if $restrict_search.taxidlist:\n+ -taxidlist \'$restrict_search.taxidlist\'\n+#end if\n+#if str($restrict_search.negative_taxids) != \'\':\n+ --negative_taxids \'$restrict_search.negative_taxids\'\n+#end if\n+#if $restrict_search.negative_taxidlist:\n+ -negative_taxidlist \'$restrict_search.negative_taxidlist\'\n+#end if\n+\n+## Mapping options\n+-score $mapping.score\n+#if $mapping.max_edit_dist > 0:\n+ -max_edit_dist $mapping.max_edit_dist\n+#end if\n+-splice \'$mapping.splice\'\n+-reftype \'$mapping.reftype\'\n+\n+## Output unaligned options\n+#if str($output_options.report_unaligned_cond.report_unaligned) == \'yes\':\n+ #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == \'yes\':\n+ -out_unaligned \'out_unaligned\'\n+ #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == \'bam\':\n+ -unaligned_fmt \'sam\'\n+ #else:\n+ -unaligned_fmt \'$output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt\'\n+ #end if\n+ #end if\n+#else:\n+ -no_unaligned\n+#end if\n+\n+## Additional output options\n+$output_options.no_discordant\n+## Switch default SAM output to be BAM.\n+#if str($output_options.outfmt_cond.outfmt) == \'bam\':\n+ $output_options.outfmt_cond.md_tag\n+ #if $query_mate:\n+ $output_options.outfmt_cond.no_query_id_trim\n+ #end if\n+ -out \'output.sam\'\n+ #if str($output_options.outfmt_cond.output_sort) == \'coordinate\':\n+ && samtools sort -@\\${GALAXY_SLOTS:-4} -O bam \'output.sam\' > \'$output\'\n+ #elif str($output_options.outfmt_cond.output_sort) == \'name\':\n+ && samtools sort -n -@\\${GALAXY_SLOTS:-4} -O'..b' in error -->\n+ <test expect_failure="true">\n+ <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>\n+ <param name="db_opts_selector" value="file"/>\n+ <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>\n+ <param name="report_unaligned_separately" value="yes"/>\n+ <param name="gilist" value="gilist1.tabular" ftype="tabular"/>\n+ <assert_stderr>\n+ <has_text text="Incompatible with argument:"/>\n+ </assert_stderr>\n+ </test>\n+ \n+ <test expect_failure="true">\n+ <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>\n+ <param name="db_opts_selector" value="db"/>\n+ <param name="database" value="phiX174"/>\n+ <param name="taxidlist" value="taxids.tabular" ftype="tabular"/>\n+ <assert_stderr>\n+ <has_text text="Taxonomy filtering is not supported in v4 BLAST dbs"/>\n+ </assert_stderr>\n+ </test>\n+ \n+ <test expect_num_outputs="1">\n+ <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+ <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+ <param name="db_opts_selector" value="file"/>\n+ <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>\n+ <output name="output" ftype="bam">\n+ <assert_contents>\n+ <has_size value="62080" delta="50"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ \n+ <test expect_num_outputs="1">\n+ <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+ <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+ <param name="db_opts_selector" value="db"/>\n+ <param name="database" value="phiX174"/>\n+ <output name="output" ftype="bam">\n+ <assert_contents>\n+ <has_size value="62079" delta="50"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+**What it does**\n+\n+.. class:: warningmark\n+\n+In addition to a BLAST database, you can also search against a fasta file of subject (target) sequences. However, this is not\n+advised because it is slower (only one CPU is used), but more importantly gives e-values for pairwise searches (very small\n+e-values which will look overly signficiant). In most cases you should convert the fasta file into a blast database using\n+*makeblastdb* and search against that.\n+\n+Magic-BLAST is a tool for mapping large next-generation RNA or DNA sequencing runs against a whole genome or transcriptome.\n+Each alignment optimizes a composite score, taking into account simultaneously the two reads of a pair, and in case of RNA-seq,\n+locating the candidate introns and adding up the score of all exons. This is very different from other versions of BLAST, where\n+each exon is scored as a separate hit and read-pairing is ignored.\n+\n+Magic-BLAST incorporates within the NCBI BLAST code framework ideas developed in the NCBI Magic pipeline, in particular hit\n+extensions by local walk and jump, and recursive clipping of mismatches near the edges of the reads, which avoids accumulating\n+artefactual mismatches near splice sites and is needed to distinguish short indels from substitutions near the edges.\n+\n+The tool accepts a single or paired set of reads in fasta or fastqsanger format and produces bam or tabular output.\n+\n+More information about Magic-BLAST is available in the\n+`online documentation <https://ncbi.github.io/magicblast/>`_.\n+ ]]></help>\n+ <expand macro="citations"/>\n+</tool>\n'

diff -r 000000000000 -r e6799e98c5fb test-data/blastdb.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastdb.loc Tue Apr 05 12:11:08 2022 +0000

@@ -0,0 +1,4 @@
+# This file is just a placeholder since Galxy does
+# not yet suppoort uploading a BLAST database, which
+# is required for functional tests.
+phiX174 phiX174 ${__HERE__}/phiX174

diff -r 000000000000 -r e6799e98c5fb test-data/gilist1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gilist1.tabular Tue Apr 05 12:11:08 2022 +0000

@@ -0,0 +1,1 @@
+57163782

diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nhd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX174/blastdb.nhd Tue Apr 05 12:11:08 2022 +0000

@@ -0,0 +1,7 @@
+15495040301
+25770063902
+2660043075
+27122163083
+3427376816
+40885395164
+7819003870

diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nhi

Binary file test-data/phiX174/blastdb.nhi has changed

diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nhr

Binary file test-data/phiX174/blastdb.nhr has changed

diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nin

Binary file test-data/phiX174/blastdb.nin has changed

diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nog

Binary file test-data/phiX174/blastdb.nog has changed

diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nsd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX174/blastdb.nsd Tue Apr 05 12:11:08 2022 +0000

@@ -0,0 +1,7 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
+gnl|bl_ord_id|33
+gnl|bl_ord_id|44
+gnl|bl_ord_id|55
+gnl|bl_ord_id|66

diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nsi

Binary file test-data/phiX174/blastdb.nsi has changed

diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nsq

Binary file test-data/phiX174/blastdb.nsq has changed

diff -r 000000000000 -r e6799e98c5fb test-data/query1.fasta.gz

Binary file test-data/query1.fasta.gz has changed

diff -r 000000000000 -r e6799e98c5fb test-data/query_forward1.fastqsanger.gz

Binary file test-data/query_forward1.fastqsanger.gz has changed

diff -r 000000000000 -r e6799e98c5fb test-data/query_reverse1.fastqsanger.gz

Binary file test-data/query_reverse1.fastqsanger.gz has changed

diff -r 000000000000 -r e6799e98c5fb test-data/subject1.fasta.gz

Binary file test-data/subject1.fasta.gz has changed

diff -r 000000000000 -r e6799e98c5fb test-data/taxids.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/taxids.tabular Tue Apr 05 12:11:08 2022 +0000

@@ -0,0 +1,1 @@
+57163782

diff -r 000000000000 -r e6799e98c5fb tool-data/blastdb.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/blastdb.loc.sample Tue Apr 05 12:11:08 2022 +0000

@@ -0,0 +1,44 @@
+# This is a sample file distributed with Galaxy that is used to define a
+# list of nucleotide BLAST databases, using three columns tab separated:
+#
+# <unique_id>{tab}<database_caption>{tab}<base_name_path>
+#
+# The captions typically contain spaces and might end with the build date.
+# It is important that the actual database name does not have a space in
+# it, and that there are only two tabs on each line.
+#
+# You can download the NCBI provided protein databases like NR from here:
+# ftp://ftp.ncbi.nlm.nih.gov/blast/db/
+#
+# For simplicity, many Galaxy servers are configured to offer just a live
+# version of each NCBI BLAST database (updated with the NCBI provided
+# Perl scripts or similar). In this case, we recommend using the case
+# sensistive base-name of the NCBI BLAST databases as the unique id.
+# Consistent naming is important for sharing workflows between Galaxy
+# servers.
+#
+# For example, consider the NCBI partially non-redundant nucleotide
+# nt BLAST database, where you have downloaded and decompressed the
+# files under /data/blastdb/ meaning at the command line BLAST+ would
+# would look at the files /data/blastdb/nt.n* when run with:
+#
+# $ blastn -db /data/blastdb/nt -query ...
+#
+# In this case use nr (lower case to match the NCBI file naming) as the
+# unique id in the first column of blastdb_p.loc, giving an entry like
+# this:
+#
+# nt{tab}NCBI partially non-redundant (nt){tab}/data/blastdb/nt
+#
+# Alternatively, rather than a "live" mirror of the NCBI databases which
+# are updated automatically, for full reproducibility the Galaxy Team
+# recommend saving date-stamped copies of the databases. In this case
+# your blastdb.loc file should include an entry per line for each
+# version you have stored. For example:
+#
+# nt_05Jun2010{tab}NCBI nt (partially non-redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nt
+# nt_15Aug2010{tab}NCBI nt (partially non-redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nt
+# ...etc...
+#
+# See also blastdb_p.loc which is for any protein BLAST database, and
+# blastdb_d.loc which is for any protein domains databases (like CDD).

diff -r 000000000000 -r e6799e98c5fb tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Apr 05 12:11:08 2022 +0000

@@ -0,0 +1,6 @@
+<tables>
+    <table name="blastdb" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/blastdb.loc"/>
+    </table>
+</tables>

diff -r 000000000000 -r e6799e98c5fb tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Apr 05 12:11:08 2022 +0000

@@ -0,0 +1,6 @@
+<tables>
+    <table name="blastdb" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/blastdb.loc"/>
+    </table>
+</tables>