comparison tools/ncbi_blast_plus/ncbi_macros.xml @ 25:e25d3acf6e68 draft

v0.3.1 completed gzip support
author peterjc
date Tue, 23 Oct 2018 08:48:19 -0400
parents 31e517610e1f
children 2889433c7ae1
comparison
equal deleted inserted replaced
24:c877294f8025 25:e25d3acf6e68
1 <macros> 1 <macros>
2 <token name="@WRAPPER_VERSION@">0.3.0</token> 2 <token name="@WRAPPER_VERSION@">0.3.1</token>
3 <xml name="parallelism"> 3 <xml name="parallelism">
4 <!-- If job splitting is enabled, break up the query file into parts --> 4 <!-- If job splitting is enabled, break up the query file into parts -->
5 <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" /> 5 <parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="1000" merge_outputs="output1" />
6 </xml> 6 </xml>
7 7
8 <xml name="preamble"> 8 <xml name="preamble">
9 <requirements> 9 <requirements>
10 <requirement type="package" version="2.7.1">blast</requirement> 10 <requirement type="package" version="2.7.1">blast</requirement>
11 </requirements> 11 </requirements>
12 <version_command>@BINARY@ -version</version_command> 12 <version_command>@BINARY@ -version</version_command>
13 </xml>
14
15 <xml name="nucl_query">
16 <param argument="-query" type="data" format="fasta,fasta.gz" label="Nucleotide query sequence(s)"/>
17 </xml>
18
19 <xml name="prot_query">
20 <param argument="-query" type="data" format="fasta,fasta.gz" label="Protein query sequence(s)"/>
13 </xml> 21 </xml>
14 22
15 <xml name="output_change_format"> 23 <xml name="output_change_format">
16 <change_format> 24 <change_format>
17 <when input="output.out_format" value="0" format="txt"/> 25 <when input="output.out_format" value="0" format="txt"/>
441 <xml name="input_filter_query_default_true"> 449 <xml name="input_filter_query_default_true">
442 <param name="filter_query" argument="-seg" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" /> 450 <param name="filter_query" argument="-seg" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" />
443 </xml> 451 </xml>
444 452
445 <xml name="input_max_hits"> 453 <xml name="input_max_hits">
446 <param name="max_hits" type="integer" min="0" value="0" label="Maximum hits to show" help="Use zero for default limits" /> 454 <param name="max_hits" type="integer" min="0" value="0" label="Maximum hits to consider/show" help="Use zero for default limits. For HTML and plain text output this value is passed -num_descriptions and -num_alignments but for XML and tabular etc, this is used with -max_target_seqs instead. In either case, in addition to limiting the final output, this alters internal limits during the search, which can in some cases exclude matches which would otherwise become the best hit." />
447 <param argument="-max_hsps" type="integer" min="1" optional="true" value="" label="Maximum number of HSPs (alignments) to keep for any single query-subject pair" help="The HSPs shown will be the best as judged by expect value. If this option is not set, BLAST shows all HSPs meeting the expect value criteria" /> 455 <param argument="-max_hsps" type="integer" min="1" optional="true" value="" label="Maximum number of HSPs (alignments) to keep for any single query-subject pair" help="The HSPs shown will be the best as judged by expect value. If this option is not set, BLAST shows all HSPs meeting the expect value criteria" />
448 </xml> 456 </xml>
449 457
450 <xml name="input_evalue"> 458 <xml name="input_evalue">
451 <param name="evalue_cutoff" argument="-evalue" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> 459 <param name="evalue_cutoff" argument="-evalue" type="float" size="15" value="0.001" label="Set expectation value cutoff" />
556 #end if 564 #end if
557 ]]></token> 565 ]]></token>
558 566
559 <token name="@THREADS@">-num_threads "\${GALAXY_SLOTS:-8}"</token> 567 <token name="@THREADS@">-num_threads "\${GALAXY_SLOTS:-8}"</token>
560 568
569 <token name="@QUERY@"><![CDATA[
570 #if $query.is_of_type('fasta.gz') and $query.ext != "fasta":
571 -query <(gunzip -c '${query}')
572 #else:
573 -query '${query}'
574 #end if
575 ]]></token>
576
561 <token name="@BLAST_DB_SUBJECT@"><![CDATA[ 577 <token name="@BLAST_DB_SUBJECT@"><![CDATA[
562 #if $db_opts.db_opts_selector == "db": 578 #if $db_opts.db_opts_selector == "db":
563 -db '${" ".join(str($db_opts.database.fields.path).split(","))}' 579 -db '${" ".join(str($db_opts.database.fields.path).split(","))}'
564 #elif $db_opts.db_opts_selector == "histdb": 580 #elif $db_opts.db_opts_selector == "histdb":
565 -db '${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}' 581 -db '${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}'
566 #else: 582 #else:
567 #if $db_opts.subject.is_of_type('fasta.gz'): 583 #if $db_opts.subject.is_of_type('fasta.gz') and $db_opts.subject.ext != "fasta":
568 -subject <(gunzip -c '${$db_opts.subject}') 584 -subject <(gunzip -c '${$db_opts.subject}')
569 #else: 585 #else:
570 -subject '${db_opts.subject}' 586 -subject '${db_opts.subject}'
571 #end if 587 #end if
572 #end if 588 #end if
587 #end if 603 #end if
588 ]]></token> 604 ]]></token>
589 <token name="@ADV_FILTER_QUERY@">$adv_opts.filter_query</token> 605 <token name="@ADV_FILTER_QUERY@">$adv_opts.filter_query</token>
590 <token name="@ADV_MAX_HITS@"><![CDATA[ 606 <token name="@ADV_MAX_HITS@"><![CDATA[
591 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string 607 ## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string
592 ## Note -max_target_seqs used to simply override -num_descriptions and -num_alignments 608 ##
593 ## but this was changed in BLAST+ 2.2.27 onwards to force their use (raised with NCBI) 609 ## Quoting BLAST 2.7.1+ output from "blastp --help" or "blastn --help":
610 ##
611 ## *** Formatting options
612 ## -num_descriptions <Integer, >=0>
613 ## Number of database sequences to show one-line descriptions for
614 ## Not applicable for outfmt > 4
615 ## Default = `500'
616 ## * Incompatible with: max_target_seqs
617 ## -num_alignments <Integer, >=0>
618 ## Number of database sequences to show alignments for
619 ## Default = `250'
620 ## * Incompatible with: max_target_seqs
621 ##
622 ## *** Restrict search or results
623 ##
624 ## -max_target_seqs <Integer, >=1>
625 ## Maximum number of aligned sequences to keep
626 ## Not applicable for outfmt <= 4
627 ## Default = `500'
628 ## * Incompatible with: num_descriptions, num_alignments
629 ##
630 ## So, taken at face value we do still need to treat the Text and HTML output
631 ## differently from the Tabular and XML, yet the treatment of these limits is
632 ## different (during search or after the search when writing the output):
633 ## https://blastedbio.blogspot.com/2015/12/blast-max-target-sequences-bug.html
634 ##
635 ## See also our user-facing help text.
594 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): 636 #if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0):
595 #if str($output.out_format) in ["6", "ext", "cols", "5"]: 637 #if str($output.out_format) in ["6", "ext", "cols", "5"]:
596 ## Most output formats use this, including tabular and XML: 638 ## Most output formats use this, including tabular and XML:
597 -max_target_seqs '${adv_opts.max_hits}' 639 -max_target_seqs '${adv_opts.max_hits}'
598 #else 640 #else
599 ## Text and HTML output formats 0-4 currently need this instead: 641 ## Text and HTML output formats 0-4 currently need this instead:
600 -num_descriptions $adv_opts.max_hits -num_alignments $adv_opts.max_hits 642 -num_descriptions '${adv_opts.max_hits}' -num_alignments '${adv_opts.max_hits}'
601 #end if 643 #end if
602 #end if 644 #end if
603 #if str($adv_opts.max_hsps) 645 #if str($adv_opts.max_hsps)
604 -max_hsps '${adv_opts.max_hsps}' 646 -max_hsps '${adv_opts.max_hsps}'
605 #end if 647 #end if