Repository 'magicblast'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/magicblast

Changeset 0:e6799e98c5fb (2022-04-05)
Next changeset 1:aea6702a3cd5 (2022-08-09)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blast commit 15fc6c06f743bae276ff02dc405e7da61a07bd08"
added:
macros.xml
magicblast.xml
test-data/blastdb.loc
test-data/gilist1.tabular
test-data/phiX174/blastdb.nhd
test-data/phiX174/blastdb.nhi
test-data/phiX174/blastdb.nhr
test-data/phiX174/blastdb.nin
test-data/phiX174/blastdb.nog
test-data/phiX174/blastdb.nsd
test-data/phiX174/blastdb.nsi
test-data/phiX174/blastdb.nsq
test-data/query1.fasta.gz
test-data/query_forward1.fastqsanger.gz
test-data/query_reverse1.fastqsanger.gz
test-data/subject1.fasta.gz
test-data/taxids.tabular
tool-data/blastdb.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r e6799e98c5fb macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Apr 05 12:11:08 2022 +0000
b
@@ -0,0 +1,33 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.6.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.09</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">magicblast</requirement>
+            <requirement type="package" version="1.15">samtools</requirement>
+        </requirements>
+    </xml>
+    <xml name="output_sort_param">
+        <param name="output_sort" type="select" label="BAM sorting mode" help="The 'Not sorted' option can significantly extend the run time of the tool (it runs using a single thread)">
+            <option value="coordinate" selected="True">Sort by chromosomal coordinates</option>
+            <option value="name">Sort by read names</option>
+            <option value="unsorted">Not sorted (sorted as input)</option>
+        </param>
+    </xml>
+    <xml name="sanitize_query" token_validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="&apos;"/>
+            </valid>
+            <mapping initial="none">
+                <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;"/>
+            </mapping>
+        </sanitizer>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1186/s12859-019-2996-x</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r e6799e98c5fb magicblast.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/magicblast.xml Tue Apr 05 12:11:08 2022 +0000
[
b'@@ -0,0 +1,362 @@\n+<tool id="magicblast" name="Magic-BLAST: map large RNA or DNA sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+    <description>against a whole genome or transcriptome</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="requirements"/>\n+    <command detect_errors="exit_code"><![CDATA[\n+#import os\n+\n+magicblast\n+-num_threads \\${GALAXY_SLOTS:-8}\n+#if $query.is_of_type(\'fasta.gz\', \'fastqsanger.gz\'):\n+    -query <(gunzip -c \'${query}\')\n+#else:\n+    -query \'${query}\'\n+#end if\n+#if $query_mate:\n+    -paired\n+    #if $query.is_of_type(\'fasta.gz\', \'fastqsanger.gz\'):\n+        -query_mate <(gunzip -c \'${query}\')\n+    #else:\n+        -query_mate \'${query}\'\n+    #end if\n+#end if\n+\n+#if $query.is_of_type(\'fastqsanger\', \'fastqsanger.gz\'):\n+    -infmt fastq\n+#end if\n+\n+#if $db_opts.db_opts_selector == "histdb":\n+    -db \'${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}\'\n+#elif $db_opts.db_opts_selector == "db":\n+    -db \'${os.path.join($db_opts.database.fields.path, "blastdb")}\'\n+#else:\n+    #if $db_opts.subject.is_of_type(\'fasta.gz\'):\n+        -subject <(gunzip -c \'${$db_opts.subject}\')\n+    #else:\n+        -subject \'${db_opts.subject}\'\n+    #end if\n+#end if\n+\n+## General search options\n+-word_size $general_search.word_size\n+-gapopen $general_search.gapopen\n+-gapextend $general_search.gapextend\n+-penalty $general_search.penalty\n+-max_intron_length $general_search.max_intron_length\n+\n+## Query filtering options\n+$query_filtering.lcase_masking\n+-validate_seqs $query_filtering.validate_seqs\n+-limit_lookup $query_filtering.limit_lookup\n+-max_db_word_count $query_filtering.max_db_word_count\n+-lookup_stride $query_filtering.lookup_stride\n+\n+## Restrict database search\n+#if $restrict_search.gilist:\n+    -gilist \'$restrict_search.gilist\'\n+#end if\n+#if $restrict_search.negative_gilist:\n+    -negative_gilist \'$restrict_search.negative_gilist\'\n+#end if\n+#if $restrict_search.seqidlist:\n+    -seqidlist \'$restrict_search.seqidlist\'\n+#end if\n+#if $restrict_search.negative_seqidlist:\n+    -negative_seqidlist \'$restrict_search.negative_seqidlist\'\n+#end if\n+#if str($restrict_search.taxids) != \'\':\n+    --taxids \'$restrict_search.taxids\'\n+#end if\n+#if $restrict_search.taxidlist:\n+    -taxidlist \'$restrict_search.taxidlist\'\n+#end if\n+#if str($restrict_search.negative_taxids) != \'\':\n+    --negative_taxids \'$restrict_search.negative_taxids\'\n+#end if\n+#if $restrict_search.negative_taxidlist:\n+    -negative_taxidlist \'$restrict_search.negative_taxidlist\'\n+#end if\n+\n+## Mapping options\n+-score $mapping.score\n+#if $mapping.max_edit_dist > 0:\n+    -max_edit_dist $mapping.max_edit_dist\n+#end if\n+-splice \'$mapping.splice\'\n+-reftype \'$mapping.reftype\'\n+\n+## Output unaligned options\n+#if str($output_options.report_unaligned_cond.report_unaligned) == \'yes\':\n+    #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == \'yes\':\n+        -out_unaligned \'out_unaligned\'\n+        #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == \'bam\':\n+            -unaligned_fmt \'sam\'\n+        #else:\n+            -unaligned_fmt \'$output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt\'\n+        #end if\n+    #end if\n+#else:\n+    -no_unaligned\n+#end if\n+\n+## Additional output options\n+$output_options.no_discordant\n+## Switch default SAM output to be BAM.\n+#if str($output_options.outfmt_cond.outfmt) == \'bam\':\n+    $output_options.outfmt_cond.md_tag\n+    #if $query_mate:\n+        $output_options.outfmt_cond.no_query_id_trim\n+    #end if\n+    -out \'output.sam\'\n+    #if str($output_options.outfmt_cond.output_sort) == \'coordinate\':\n+        && samtools sort -@\\${GALAXY_SLOTS:-4} -O bam \'output.sam\' > \'$output\'\n+    #elif str($output_options.outfmt_cond.output_sort) == \'name\':\n+        && samtools sort -n -@\\${GALAXY_SLOTS:-4} -O'..b' in error -->\n+        <test expect_failure="true">\n+            <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>\n+            <param name="db_opts_selector" value="file"/>\n+            <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>\n+            <param name="report_unaligned_separately" value="yes"/>\n+            <param name="gilist" value="gilist1.tabular" ftype="tabular"/>\n+            <assert_stderr>\n+                <has_text text="Incompatible with argument:"/>\n+            </assert_stderr>\n+        </test>\n+        <!-- Single fasta.gz input, cached db, taxidlist, results in error -->\n+        <test expect_failure="true">\n+            <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>\n+            <param name="db_opts_selector" value="db"/>\n+            <param name="database" value="phiX174"/>\n+            <param name="taxidlist" value="taxids.tabular" ftype="tabular"/>\n+            <assert_stderr>\n+                <has_text text="Taxonomy filtering is not supported in v4 BLAST dbs"/>\n+            </assert_stderr>\n+        </test>\n+        <!-- Paired fastqsanger.gz input, subject file -->\n+        <test expect_num_outputs="1">\n+            <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+            <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+            <param name="db_opts_selector" value="file"/>\n+            <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>\n+            <output name="output" ftype="bam">\n+                <assert_contents>\n+                    <has_size value="62080" delta="50"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <!-- Paired fastqsanger.gz input, cached blast db -->\n+        <test expect_num_outputs="1">\n+            <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+            <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+            <param name="db_opts_selector" value="db"/>\n+            <param name="database" value="phiX174"/>\n+            <output name="output" ftype="bam">\n+                <assert_contents>\n+                    <has_size value="62079" delta="50"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+**What it does**\n+\n+.. class:: warningmark\n+\n+In addition to a BLAST database, you can also search against a fasta file of subject (target) sequences. However, this is not\n+advised because it is slower (only one CPU is used), but more importantly gives e-values for pairwise searches (very small\n+e-values which will look overly signficiant).  In most cases you should convert the fasta file into a blast database using\n+*makeblastdb* and search against that.\n+\n+Magic-BLAST is a tool for mapping large next-generation RNA or DNA sequencing runs against a whole genome or transcriptome.\n+Each alignment optimizes a composite score, taking into account simultaneously the two reads of a pair, and in case of RNA-seq,\n+locating the candidate introns and adding up the score of all exons. This is very different from other versions of BLAST, where\n+each exon is scored as a separate hit and read-pairing is ignored.\n+\n+Magic-BLAST incorporates within the NCBI BLAST code framework ideas developed in the NCBI Magic pipeline, in particular hit\n+extensions by local walk and jump, and recursive clipping of mismatches near the edges of the reads, which avoids accumulating\n+artefactual mismatches near splice sites and is needed to distinguish short indels from substitutions near the edges.\n+\n+The tool accepts a single or paired set of reads in fasta or fastqsanger format and produces bam or tabular output.\n+\n+More information about Magic-BLAST is available in the\n+`online documentation <https://ncbi.github.io/magicblast/>`_.\n+    ]]></help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r e6799e98c5fb test-data/blastdb.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blastdb.loc Tue Apr 05 12:11:08 2022 +0000
b
@@ -0,0 +1,4 @@
+# This file is just a placeholder since Galxy does
+# not yet suppoort uploading a BLAST database, which
+# is required for functional tests.
+phiX174 phiX174 ${__HERE__}/phiX174
b
diff -r 000000000000 -r e6799e98c5fb test-data/gilist1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gilist1.tabular Tue Apr 05 12:11:08 2022 +0000
b
@@ -0,0 +1,1 @@
+57163782
b
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nhd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX174/blastdb.nhd Tue Apr 05 12:11:08 2022 +0000
b
@@ -0,0 +1,7 @@
+15495040301
+25770063902
+2660043075
+27122163083
+3427376816
+40885395164
+7819003870
b
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nhi
b
Binary file test-data/phiX174/blastdb.nhi has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nhr
b
Binary file test-data/phiX174/blastdb.nhr has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nin
b
Binary file test-data/phiX174/blastdb.nin has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nog
b
Binary file test-data/phiX174/blastdb.nog has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nsd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX174/blastdb.nsd Tue Apr 05 12:11:08 2022 +0000
b
@@ -0,0 +1,7 @@
+gnl|bl_ord_id|00
+gnl|bl_ord_id|11
+gnl|bl_ord_id|22
+gnl|bl_ord_id|33
+gnl|bl_ord_id|44
+gnl|bl_ord_id|55
+gnl|bl_ord_id|66
b
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nsi
b
Binary file test-data/phiX174/blastdb.nsi has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nsq
b
Binary file test-data/phiX174/blastdb.nsq has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/query1.fasta.gz
b
Binary file test-data/query1.fasta.gz has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/query_forward1.fastqsanger.gz
b
Binary file test-data/query_forward1.fastqsanger.gz has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/query_reverse1.fastqsanger.gz
b
Binary file test-data/query_reverse1.fastqsanger.gz has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/subject1.fasta.gz
b
Binary file test-data/subject1.fasta.gz has changed
b
diff -r 000000000000 -r e6799e98c5fb test-data/taxids.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/taxids.tabular Tue Apr 05 12:11:08 2022 +0000
b
@@ -0,0 +1,1 @@
+57163782
b
diff -r 000000000000 -r e6799e98c5fb tool-data/blastdb.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/blastdb.loc.sample Tue Apr 05 12:11:08 2022 +0000
b
@@ -0,0 +1,44 @@
+# This is a sample file distributed with Galaxy that is used to define a
+# list of nucleotide BLAST databases, using three columns tab separated:
+#
+# <unique_id>{tab}<database_caption>{tab}<base_name_path>
+#
+# The captions typically contain spaces and might end with the build date.
+# It is important that the actual database name does not have a space in
+# it, and that there are only two tabs on each line.
+#
+# You can download the NCBI provided protein databases like NR from here:
+# ftp://ftp.ncbi.nlm.nih.gov/blast/db/
+#
+# For simplicity, many Galaxy servers are configured to offer just a live
+# version of each NCBI BLAST database (updated with the NCBI provided
+# Perl scripts or similar). In this case, we recommend using the case
+# sensistive base-name of the NCBI BLAST databases as the unique id.
+# Consistent naming is important for sharing workflows between Galaxy
+# servers.
+#
+# For example, consider the NCBI partially non-redundant nucleotide 
+# nt BLAST database, where you have downloaded and decompressed the
+# files under /data/blastdb/ meaning at the command line BLAST+ would
+# would look at the files /data/blastdb/nt.n* when run with:
+#
+# $ blastn -db /data/blastdb/nt -query ...
+#
+# In this case use nr (lower case to match the NCBI file naming) as the
+# unique id in the first column of blastdb_p.loc, giving an entry like
+# this:
+#
+# nt{tab}NCBI partially non-redundant (nt){tab}/data/blastdb/nt
+#
+# Alternatively, rather than a "live" mirror of the NCBI databases which
+# are updated automatically, for full reproducibility the Galaxy Team
+# recommend saving date-stamped copies of the databases. In this case
+# your blastdb.loc file should include an entry per line for each
+# version you have stored. For example:
+#
+# nt_05Jun2010{tab}NCBI nt (partially non-redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nt
+# nt_15Aug2010{tab}NCBI nt (partially non-redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nt
+# ...etc...
+#
+# See also blastdb_p.loc which is for any protein BLAST database, and
+# blastdb_d.loc which is for any protein domains databases (like CDD).
b
diff -r 000000000000 -r e6799e98c5fb tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Apr 05 12:11:08 2022 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="blastdb" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/blastdb.loc"/>
+    </table>
+</tables>
b
diff -r 000000000000 -r e6799e98c5fb tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Apr 05 12:11:08 2022 +0000
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="blastdb" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/blastdb.loc"/>
+    </table>
+</tables>