Next changeset 1:aea6702a3cd5 (2022-08-09) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blast commit 15fc6c06f743bae276ff02dc405e7da61a07bd08" |
added:
macros.xml magicblast.xml test-data/blastdb.loc test-data/gilist1.tabular test-data/phiX174/blastdb.nhd test-data/phiX174/blastdb.nhi test-data/phiX174/blastdb.nhr test-data/phiX174/blastdb.nin test-data/phiX174/blastdb.nog test-data/phiX174/blastdb.nsd test-data/phiX174/blastdb.nsi test-data/phiX174/blastdb.nsq test-data/query1.fasta.gz test-data/query_forward1.fastqsanger.gz test-data/query_reverse1.fastqsanger.gz test-data/subject1.fasta.gz test-data/taxids.tabular tool-data/blastdb.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 000000000000 -r e6799e98c5fb macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Apr 05 12:11:08 2022 +0000 |
b |
@@ -0,0 +1,33 @@ +<macros> + <token name="@TOOL_VERSION@">1.6.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">20.09</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">magicblast</requirement> + <requirement type="package" version="1.15">samtools</requirement> + </requirements> + </xml> + <xml name="output_sort_param"> + <param name="output_sort" type="select" label="BAM sorting mode" help="The 'Not sorted' option can significantly extend the run time of the tool (it runs using a single thread)"> + <option value="coordinate" selected="True">Sort by chromosomal coordinates</option> + <option value="name">Sort by read names</option> + <option value="unsorted">Not sorted (sorted as input)</option> + </param> + </xml> + <xml name="sanitize_query" token_validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"> + <sanitizer> + <valid initial="@VALIDINITIAL@"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="'"'"'"/> + </mapping> + </sanitizer> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1186/s12859-019-2996-x</citation> + </citations> + </xml> +</macros> |
b |
diff -r 000000000000 -r e6799e98c5fb magicblast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/magicblast.xml Tue Apr 05 12:11:08 2022 +0000 |
[ |
b'@@ -0,0 +1,362 @@\n+<tool id="magicblast" name="Magic-BLAST: map large RNA or DNA sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+ <description>against a whole genome or transcriptome</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <expand macro="requirements"/>\n+ <command detect_errors="exit_code"><![CDATA[\n+#import os\n+\n+magicblast\n+-num_threads \\${GALAXY_SLOTS:-8}\n+#if $query.is_of_type(\'fasta.gz\', \'fastqsanger.gz\'):\n+ -query <(gunzip -c \'${query}\')\n+#else:\n+ -query \'${query}\'\n+#end if\n+#if $query_mate:\n+ -paired\n+ #if $query.is_of_type(\'fasta.gz\', \'fastqsanger.gz\'):\n+ -query_mate <(gunzip -c \'${query}\')\n+ #else:\n+ -query_mate \'${query}\'\n+ #end if\n+#end if\n+\n+#if $query.is_of_type(\'fastqsanger\', \'fastqsanger.gz\'):\n+ -infmt fastq\n+#end if\n+\n+#if $db_opts.db_opts_selector == "histdb":\n+ -db \'${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}\'\n+#elif $db_opts.db_opts_selector == "db":\n+ -db \'${os.path.join($db_opts.database.fields.path, "blastdb")}\'\n+#else:\n+ #if $db_opts.subject.is_of_type(\'fasta.gz\'):\n+ -subject <(gunzip -c \'${$db_opts.subject}\')\n+ #else:\n+ -subject \'${db_opts.subject}\'\n+ #end if\n+#end if\n+\n+## General search options\n+-word_size $general_search.word_size\n+-gapopen $general_search.gapopen\n+-gapextend $general_search.gapextend\n+-penalty $general_search.penalty\n+-max_intron_length $general_search.max_intron_length\n+\n+## Query filtering options\n+$query_filtering.lcase_masking\n+-validate_seqs $query_filtering.validate_seqs\n+-limit_lookup $query_filtering.limit_lookup\n+-max_db_word_count $query_filtering.max_db_word_count\n+-lookup_stride $query_filtering.lookup_stride\n+\n+## Restrict database search\n+#if $restrict_search.gilist:\n+ -gilist \'$restrict_search.gilist\'\n+#end if\n+#if $restrict_search.negative_gilist:\n+ -negative_gilist \'$restrict_search.negative_gilist\'\n+#end if\n+#if $restrict_search.seqidlist:\n+ -seqidlist \'$restrict_search.seqidlist\'\n+#end if\n+#if $restrict_search.negative_seqidlist:\n+ -negative_seqidlist \'$restrict_search.negative_seqidlist\'\n+#end if\n+#if str($restrict_search.taxids) != \'\':\n+ --taxids \'$restrict_search.taxids\'\n+#end if\n+#if $restrict_search.taxidlist:\n+ -taxidlist \'$restrict_search.taxidlist\'\n+#end if\n+#if str($restrict_search.negative_taxids) != \'\':\n+ --negative_taxids \'$restrict_search.negative_taxids\'\n+#end if\n+#if $restrict_search.negative_taxidlist:\n+ -negative_taxidlist \'$restrict_search.negative_taxidlist\'\n+#end if\n+\n+## Mapping options\n+-score $mapping.score\n+#if $mapping.max_edit_dist > 0:\n+ -max_edit_dist $mapping.max_edit_dist\n+#end if\n+-splice \'$mapping.splice\'\n+-reftype \'$mapping.reftype\'\n+\n+## Output unaligned options\n+#if str($output_options.report_unaligned_cond.report_unaligned) == \'yes\':\n+ #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == \'yes\':\n+ -out_unaligned \'out_unaligned\'\n+ #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == \'bam\':\n+ -unaligned_fmt \'sam\'\n+ #else:\n+ -unaligned_fmt \'$output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt\'\n+ #end if\n+ #end if\n+#else:\n+ -no_unaligned\n+#end if\n+\n+## Additional output options\n+$output_options.no_discordant\n+## Switch default SAM output to be BAM.\n+#if str($output_options.outfmt_cond.outfmt) == \'bam\':\n+ $output_options.outfmt_cond.md_tag\n+ #if $query_mate:\n+ $output_options.outfmt_cond.no_query_id_trim\n+ #end if\n+ -out \'output.sam\'\n+ #if str($output_options.outfmt_cond.output_sort) == \'coordinate\':\n+ && samtools sort -@\\${GALAXY_SLOTS:-4} -O bam \'output.sam\' > \'$output\'\n+ #elif str($output_options.outfmt_cond.output_sort) == \'name\':\n+ && samtools sort -n -@\\${GALAXY_SLOTS:-4} -O'..b' in error -->\n+ <test expect_failure="true">\n+ <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>\n+ <param name="db_opts_selector" value="file"/>\n+ <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>\n+ <param name="report_unaligned_separately" value="yes"/>\n+ <param name="gilist" value="gilist1.tabular" ftype="tabular"/>\n+ <assert_stderr>\n+ <has_text text="Incompatible with argument:"/>\n+ </assert_stderr>\n+ </test>\n+ <!-- Single fasta.gz input, cached db, taxidlist, results in error -->\n+ <test expect_failure="true">\n+ <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>\n+ <param name="db_opts_selector" value="db"/>\n+ <param name="database" value="phiX174"/>\n+ <param name="taxidlist" value="taxids.tabular" ftype="tabular"/>\n+ <assert_stderr>\n+ <has_text text="Taxonomy filtering is not supported in v4 BLAST dbs"/>\n+ </assert_stderr>\n+ </test>\n+ <!-- Paired fastqsanger.gz input, subject file -->\n+ <test expect_num_outputs="1">\n+ <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+ <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+ <param name="db_opts_selector" value="file"/>\n+ <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>\n+ <output name="output" ftype="bam">\n+ <assert_contents>\n+ <has_size value="62080" delta="50"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ <!-- Paired fastqsanger.gz input, cached blast db -->\n+ <test expect_num_outputs="1">\n+ <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+ <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n+ <param name="db_opts_selector" value="db"/>\n+ <param name="database" value="phiX174"/>\n+ <output name="output" ftype="bam">\n+ <assert_contents>\n+ <has_size value="62079" delta="50"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+**What it does**\n+\n+.. class:: warningmark\n+\n+In addition to a BLAST database, you can also search against a fasta file of subject (target) sequences. However, this is not\n+advised because it is slower (only one CPU is used), but more importantly gives e-values for pairwise searches (very small\n+e-values which will look overly signficiant). In most cases you should convert the fasta file into a blast database using\n+*makeblastdb* and search against that.\n+\n+Magic-BLAST is a tool for mapping large next-generation RNA or DNA sequencing runs against a whole genome or transcriptome.\n+Each alignment optimizes a composite score, taking into account simultaneously the two reads of a pair, and in case of RNA-seq,\n+locating the candidate introns and adding up the score of all exons. This is very different from other versions of BLAST, where\n+each exon is scored as a separate hit and read-pairing is ignored.\n+\n+Magic-BLAST incorporates within the NCBI BLAST code framework ideas developed in the NCBI Magic pipeline, in particular hit\n+extensions by local walk and jump, and recursive clipping of mismatches near the edges of the reads, which avoids accumulating\n+artefactual mismatches near splice sites and is needed to distinguish short indels from substitutions near the edges.\n+\n+The tool accepts a single or paired set of reads in fasta or fastqsanger format and produces bam or tabular output.\n+\n+More information about Magic-BLAST is available in the\n+`online documentation <https://ncbi.github.io/magicblast/>`_.\n+ ]]></help>\n+ <expand macro="citations"/>\n+</tool>\n' |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/blastdb.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blastdb.loc Tue Apr 05 12:11:08 2022 +0000 |
b |
@@ -0,0 +1,4 @@ +# This file is just a placeholder since Galxy does +# not yet suppoort uploading a BLAST database, which +# is required for functional tests. +phiX174 phiX174 ${__HERE__}/phiX174 |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/gilist1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gilist1.tabular Tue Apr 05 12:11:08 2022 +0000 |
b |
@@ -0,0 +1,1 @@ +57163782 |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nhd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX174/blastdb.nhd Tue Apr 05 12:11:08 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +15495040301 +25770063902 +2660043075 +27122163083 +3427376816 +40885395164 +7819003870 |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nhi |
b |
Binary file test-data/phiX174/blastdb.nhi has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nhr |
b |
Binary file test-data/phiX174/blastdb.nhr has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nin |
b |
Binary file test-data/phiX174/blastdb.nin has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nog |
b |
Binary file test-data/phiX174/blastdb.nog has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nsd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX174/blastdb.nsd Tue Apr 05 12:11:08 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +gnl|bl_ord_id|00 +gnl|bl_ord_id|11 +gnl|bl_ord_id|22 +gnl|bl_ord_id|33 +gnl|bl_ord_id|44 +gnl|bl_ord_id|55 +gnl|bl_ord_id|66 |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nsi |
b |
Binary file test-data/phiX174/blastdb.nsi has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/phiX174/blastdb.nsq |
b |
Binary file test-data/phiX174/blastdb.nsq has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/query1.fasta.gz |
b |
Binary file test-data/query1.fasta.gz has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/query_forward1.fastqsanger.gz |
b |
Binary file test-data/query_forward1.fastqsanger.gz has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/query_reverse1.fastqsanger.gz |
b |
Binary file test-data/query_reverse1.fastqsanger.gz has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/subject1.fasta.gz |
b |
Binary file test-data/subject1.fasta.gz has changed |
b |
diff -r 000000000000 -r e6799e98c5fb test-data/taxids.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/taxids.tabular Tue Apr 05 12:11:08 2022 +0000 |
b |
@@ -0,0 +1,1 @@ +57163782 |
b |
diff -r 000000000000 -r e6799e98c5fb tool-data/blastdb.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/blastdb.loc.sample Tue Apr 05 12:11:08 2022 +0000 |
b |
@@ -0,0 +1,44 @@ +# This is a sample file distributed with Galaxy that is used to define a +# list of nucleotide BLAST databases, using three columns tab separated: +# +# <unique_id>{tab}<database_caption>{tab}<base_name_path> +# +# The captions typically contain spaces and might end with the build date. +# It is important that the actual database name does not have a space in +# it, and that there are only two tabs on each line. +# +# You can download the NCBI provided protein databases like NR from here: +# ftp://ftp.ncbi.nlm.nih.gov/blast/db/ +# +# For simplicity, many Galaxy servers are configured to offer just a live +# version of each NCBI BLAST database (updated with the NCBI provided +# Perl scripts or similar). In this case, we recommend using the case +# sensistive base-name of the NCBI BLAST databases as the unique id. +# Consistent naming is important for sharing workflows between Galaxy +# servers. +# +# For example, consider the NCBI partially non-redundant nucleotide +# nt BLAST database, where you have downloaded and decompressed the +# files under /data/blastdb/ meaning at the command line BLAST+ would +# would look at the files /data/blastdb/nt.n* when run with: +# +# $ blastn -db /data/blastdb/nt -query ... +# +# In this case use nr (lower case to match the NCBI file naming) as the +# unique id in the first column of blastdb_p.loc, giving an entry like +# this: +# +# nt{tab}NCBI partially non-redundant (nt){tab}/data/blastdb/nt +# +# Alternatively, rather than a "live" mirror of the NCBI databases which +# are updated automatically, for full reproducibility the Galaxy Team +# recommend saving date-stamped copies of the databases. In this case +# your blastdb.loc file should include an entry per line for each +# version you have stored. For example: +# +# nt_05Jun2010{tab}NCBI nt (partially non-redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nt +# nt_15Aug2010{tab}NCBI nt (partially non-redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nt +# ...etc... +# +# See also blastdb_p.loc which is for any protein BLAST database, and +# blastdb_d.loc which is for any protein domains databases (like CDD). |
b |
diff -r 000000000000 -r e6799e98c5fb tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Apr 05 12:11:08 2022 +0000 |
b |
@@ -0,0 +1,6 @@ +<tables> + <table name="blastdb" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/blastdb.loc"/> + </table> +</tables> |
b |
diff -r 000000000000 -r e6799e98c5fb tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue Apr 05 12:11:08 2022 +0000 |
b |
@@ -0,0 +1,6 @@ +<tables> + <table name="blastdb" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/blastdb.loc"/> + </table> +</tables> |