Mercurial > repos > devteam > ncbi_blast_plus
diff tools/ncbi_blast_plus/ncbi_macros.xml @ 11:4c4a0da938ff draft
Uploaded v0.0.22, now wraps BLAST+ 2.2.28 allowing extended tabular output to include the hit descriptions as column 25.
Supports $GALAXY_SLOTS.
Includes more tests and heavy use of macros.
author | peterjc |
---|---|
date | Thu, 05 Dec 2013 06:55:59 -0500 |
parents | |
children | 623f727cdff1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ncbi_blast_plus/ncbi_macros.xml Thu Dec 05 06:55:59 2013 -0500 @@ -0,0 +1,382 @@ +<macros> + <xml name="output_change_format"> + <change_format> + <when input="out_format" value="0" format="txt"/> + <when input="out_format" value="0 -html" format="html"/> + <when input="out_format" value="2" format="txt"/> + <when input="out_format" value="2 -html" format="html"/> + <when input="out_format" value="4" format="txt"/> + <when input="out_format" value="4 -html" format="html"/> + <when input="out_format" value="5" format="blastxml"/> + </change_format> + </xml> + <xml name="input_out_format"> + <param name="out_format" type="select" label="Output format"> + <option value="6">Tabular (standard 12 columns)</option> + <option value="ext" selected="True">Tabular (extended 25 columns)</option> + <option value="5">BLAST XML</option> + <option value="0">Pairwise text</option> + <option value="0 -html">Pairwise HTML</option> + <option value="2">Query-anchored text</option> + <option value="2 -html">Query-anchored HTML</option> + <option value="4">Flat query-anchored text</option> + <option value="4 -html">Flat query-anchored HTML</option> + <!-- + <option value="-outfmt 11">BLAST archive format (ASN.1)</option> + --> + </param> + </xml> + <xml name="input_scoring_matrix"> + <param name="matrix" type="select" label="Scoring matrix"> + <option value="BLOSUM90">BLOSUM90</option> + <option value="BLOSUM80">BLOSUM80</option> + <option value="BLOSUM62" selected="true">BLOSUM62 (default)</option> + <option value="BLOSUM50">BLOSUM50</option> + <option value="BLOSUM45">BLOSUM45</option> + <option value="PAM250">PAM250</option> + <option value="PAM70">PAM70</option> + <option value="PAM30">PAM30</option> + </param> + </xml> + <xml name="stdio"> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + </xml> + <xml name="input_query_gencode"> + <param name="query_gencode" type="select" label="Query genetic code"> + <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> + <option value="1" select="True">1. Standard</option> + <option value="2">2. Vertebrate Mitochondrial</option> + <option value="3">3. Yeast Mitochondrial</option> + <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="5">5. Invertebrate Mitochondrial</option> + <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> + <option value="9">9. Echinoderm Mitochondrial</option> + <option value="10">10. Euplotid Nuclear</option> + <option value="11">11. Bacteria and Archaea</option> + <option value="12">12. Alternative Yeast Nuclear</option> + <option value="13">13. Ascidian Mitochondrial</option> + <option value="14">14. Flatworm Mitochondrial</option> + <option value="15">15. Blepharisma Macronuclear</option> + <option value="16">16. Chlorophycean Mitochondrial Code</option> + <option value="21">21. Trematode Mitochondrial Code</option> + <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> + <option value="23">23. Thraustochytrium Mitochondrial Code</option> + <option value="24">24. Pterobranchia mitochondrial code</option> + </param> + </xml> + <xml name="input_db_gencode"> + <param name="db_gencode" type="select" label="Database/subject genetic code"> + <!-- See http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi for details --> + <option value="1" select="True">1. Standard</option> + <option value="2">2. Vertebrate Mitochondrial</option> + <option value="3">3. Yeast Mitochondrial</option> + <option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="5">5. Invertebrate Mitochondrial</option> + <option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> + <option value="9">9. Echinoderm Mitochondrial</option> + <option value="10">10. Euplotid Nuclear</option> + <option value="11">11. Bacteria and Archaea</option> + <option value="12">12. Alternative Yeast Nuclear</option> + <option value="13">13. Ascidian Mitochondrial</option> + <option value="14">14. Flatworm Mitochondrial</option> + <option value="15">15. Blepharisma Macronuclear</option> + <option value="16">16. Chlorophycean Mitochondrial Code</option> + <option value="21">21. Trematode Mitochondrial Code</option> + <option value="22">22. Scenedesmus obliquus mitochondrial Code</option> + <option value="23">23. Thraustochytrium Mitochondrial Code</option> + <option value="24">24. Pterobranchia mitochondrial code</option> + </param> + </xml> + <xml name="input_conditional_nucleotide_db"> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Subject database/sequences"> + <option value="db" selected="True">Locally installed BLAST database</option> + <option value="histdb">BLAST database from your history</option> + <option value="file">FASTA file from your history (see warning note below)</option> + </param> + <when value="db"> + <param name="database" type="select" label="Nucleotide BLAST database"> + <options from_file="blastdb.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="file"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="data" format="fasta" label="Nucleotide FASTA file to use as database"/> + </when> + </conditional> + </xml> + <xml name="input_conditional_protein_db"> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Subject database/sequences"> + <option value="db" selected="True">Locally installed BLAST database</option> + <option value="histdb">BLAST database from your history</option> + <option value="file">FASTA file from your history (see warning note below)</option> + </param> + <when value="db"> + <param name="database" type="select" label="Protein BLAST database"> + <options from_file="blastdb_p.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbp" label="Protein BLAST database" /> + <param name="subject" type="hidden" value="" /> + </when> + <when value="file"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="data" format="fasta" label="Protein FASTA file to use as database"/> + </when> + </conditional> + </xml> + <xml name="input_conditional_pssm"> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Protein domain database (PSSM)"> + <option value="db" selected="True">Locally installed BLAST database</option> + <!-- TODO - define new datatype + <option value="histdb">BLAST protein domain database from your history</option> + --> + </param> + <when value="db"> + <param name="database" type="select" label="Protein domain database"> + <options from_file="blastdb_d.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + <param name="histdb" type="hidden" value="" /> + <param name="subject" type="hidden" value="" /> + </when> + <!-- TODO - define new datatype + <when value="histdb"> + <param name="database" type="hidden" value="" /> + <param name="histdb" type="data" format="blastdbd" label="Protein domain database" /> + <param name="subject" type="hidden" value="" /> + </when> + --> + </conditional> + </xml> + <xml name="input_conditional_choose_db_type"> + <conditional name="db_opts"> + <param name="db_type" type="select" label="Type of BLAST database"> + <option value="nucl" selected="True">Nucleotide</option> + <option value="prot">Protein</option> + </param> + <when value="nucl"> + <param name="database" type="select" label="Nucleotide BLAST database"> + <options from_file="blastdb.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + </when> + <when value="prot"> + <param name="database" type="select" label="Protein BLAST database"> + <options from_file="blastdb_p.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + </when> + </conditional> + </xml> + <xml name="input_parse_deflines"> + <param name="parse_deflines" type="boolean" label="Should the query and subject defline(s) be parsed?" truevalue="-parse_deflines" falsevalue="" checked="false" help="This affects the formatting of the query/subject ID strings"/> + </xml> + <xml name="input_filter_query_default_false"> + <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="false" /> + </xml> + <xml name="input_filter_query_default_true"> + <param name="filter_query" type="boolean" label="Filter out low complexity regions (with SEG)" truevalue="-seg yes" falsevalue="-seg no" checked="true" /> + </xml> + <xml name="input_max_hits"> + <param name="max_hits" type="integer" value="0" label="Maximum hits to show" help="Use zero for default limits"> + <validator type="in_range" min="0" /> + </param> + </xml> + <xml name="input_evalue"> + <param name="evalue_cutoff" type="float" size="15" value="0.001" label="Set expectation value cutoff" /> + </xml> + <xml name="input_word_size"> + <param name="word_size" type="integer" value="0" label="Word size for wordfinder algorithm" help="Use zero for default, otherwise minimum 2."> + <validator type="in_range" min="0" /> + </param> + </xml> + <xml name="input_strand"> + <param name="strand" type="select" label="Query strand(s) to search against database/subject"> + <option value="-strand both">Both</option> + <option value="-strand plus">Plus (forward)</option> + <option value="-strand minus">Minus (reverse complement)</option> + </param> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="binary">@BINARY@</requirement> + <requirement type="package" version="2.2.28">blast+</requirement> + </requirements> + <version_command>@BINARY@ -version</version_command> + </xml> + <xml name="advanced_options"> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic" /> + <when value="advanced"> + <yield /> + </when> + </conditional> + </xml> + <token name="@THREADS@">-num_threads "\${GALAXY_SLOTS:-8}"</token> + <token name="@BLAST_DB_SUBJECT@"> +#if $db_opts.db_opts_selector == "db": + -db "${db_opts.database.fields.path}" +#elif $db_opts.db_opts_selector == "histdb": + -db "${os.path.join($db_opts.histdb.extra_files_path,'blastdb')}" +#else: + -subject "$db_opts.subject" +#end if + </token> + <token name="@BLAST_OUTPUT@">-out "$output1" +##Set the extended list here so when we add things, saved workflows are not affected +#if str($out_format)=="ext": + -outfmt "6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles" +#else: + -outfmt $out_format +#end if + </token> + <token name="@ADVANCED_OPTIONS@">$adv_opts.filter_query +## Need int(str(...)) because $adv_opts.max_hits is an InputValueWrapper object not a string +## Note -max_target_seqs overrides -num_descriptions and -num_alignments +#if (str($adv_opts.max_hits) and int(str($adv_opts.max_hits)) > 0): +-max_target_seqs $adv_opts.max_hits +#end if +#if (str($adv_opts.word_size) and int(str($adv_opts.word_size)) > 0): +-word_size $adv_opts.word_size +#end if +$adv_opts.parse_deflines + </token> + <!-- @ON_DB_SUBJECT@ is for use with @BLAST_DB_SUBJECT@ --> + <token name="@ON_DB_SUBJECT@">#if str($db_opts.db_opts_selector)=='db' +${db_opts.database} +#elif str($db_opts.db_opts_selector)=='histdb' +${db_opts.histdb.name} +#else +${db_opts.subject.name} +#end if</token> + <token name="@REFERENCES@"> +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Christiam Camacho et al. (2009). +BLAST+: architecture and applications. +BMC Bioinformatics. 15;10:421. +http://dx.doi.org/10.1186/1471-2105-10-421 + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus + </token> + <token name="@OUTPUT_FORMAT@">**Output format** + +Because Galaxy focuses on processing tabular data, the default output of this +tool is tabular. The standard BLAST+ tabular output contains 12 columns: + +====== ========= ============================================ +Column NCBI name Description +------ --------- -------------------------------------------- + 1 qseqid Query Seq-id (ID of your sequence) + 2 sseqid Subject Seq-id (ID of the database hit) + 3 pident Percentage of identical matches + 4 length Alignment length + 5 mismatch Number of mismatches + 6 gapopen Number of gap openings + 7 qstart Start of alignment in query + 8 qend End of alignment in query + 9 sstart Start of alignment in subject (database hit) + 10 send End of alignment in subject (database hit) + 11 evalue Expectation value (E-value) + 12 bitscore Bit score +====== ========= ============================================ + +The BLAST+ tools can optionally output additional columns of information, +but this takes longer to calculate. Most (but not all) of these columns are +included by selecting the extended tabular output. The extra columns are +included *after* the standard 12 columns. This is so that you can write +workflow filtering steps that accept either the 12 or 25 column tabular +BLAST output. Galaxy now uses this extended 25 column output by default. + +====== ============= =========================================== +Column NCBI name Description +------ ------------- ------------------------------------------- + 13 sallseqid All subject Seq-id(s), separated by ';' + 14 score Raw score + 15 nident Number of identical matches + 16 positive Number of positive-scoring matches + 17 gaps Total number of gaps + 18 ppos Percentage of positive-scoring matches + 19 qframe Query frame + 20 sframe Subject frame + 21 qseq Aligned part of query sequence + 22 sseq Aligned part of subject sequence + 23 qlen Query sequence length + 24 slen Subject sequence length + 25 salltitles All subject title(s), separated by '<>' +====== ============= =========================================== + +The third option is BLAST XML output, which is designed to be parsed by +another program, and is understood by some Galaxy tools. + +You can also choose several plain text or HTML output formats which are designed to be read by a person (not by another program). +The HTML versions use basic webpage formatting and can include links to the hits on the NCBI website. +The pairwise output (the default on the NCBI BLAST website) shows each match as a pairwise alignment with the query. +The two query anchored outputs show a multiple sequence alignment between the query and all the matches, +and differ in how insertions are shown (marked as insertions or with gap characters added to the other sequences). + </token> + <token name="@FASTA_WARNING@">.. class:: warningmark + +You can also search against a FASTA file of subject (target) +sequences. This is *not* advised because it is slower (only one +CPU is used), but more importantly gives e-values for pairwise +searches (very small e-values which will look overly signficiant). +In most cases you should instead turn the other FASTA file into a +database first using *makeblastdb* and search against that. + </token> + <token name="@SEARCH_TIME_WARNING@">.. class:: warningmark + +**Note**. Database searches may take a substantial amount of time. +For large input datasets it is advisable to allow overnight processing. + +----- + </token> +</macros>