comparison blast2lca.xml @ 0:ad69d2a05c3c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/megan commit 2a49a6cdc1b4d37ab30eb85b8c658ccf9f5a0644"
author iuc
date Wed, 24 Nov 2021 21:52:36 +0000
parents
children 1930eb870dca
comparison
equal deleted inserted replaced
-1:000000000000 0:ad69d2a05c3c
1 <tool id="megan_blast2lca" name="MEGAN Blast2LCA: apply LCA alignment" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>to produce a taxonomic classification</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 #import re
10
11 #if $blast_input.is_of_type('daa'):
12 #set blast_format = 'DAA'
13 #else if $blast_input.is_of_type('txt'):
14 #set blast_format = 'BlastText'
15 #else if $blast_input.is_of_type('blastxml'):
16 #set blast_format = 'BlastXML'
17 #else if $blast_input.is_of_type('tabular'):
18 #set blast_format = 'BlastTab'
19 #else if $blast_input.is_of_type('sam'):
20 #set blast_format = 'SAM'
21 #end if
22 #set blast_ext = '.' + $blast_format
23 #if $blast_input.ext.endswith('.gz'):
24 #set blast_ext = $blast_ext + '.gz'
25 #end if
26
27 #set blast_input_identifier = 'blast_input' + $blast_ext
28 ln -s '${blast_input}' '${blast_input_identifier}' &&
29
30 blast2lca
31 --input '${blast_input_identifier}'
32 --format '${blast_format}'
33 --mode '${mode}'
34 $advanced_options.showRanks
35 $advanced_options.officialRanksOnly
36 $advanced_options.showTaxIds
37 --minScore $advanced_options.minScore
38 --maxExpected $advanced_options.maxExpected
39 --topPercent $advanced_options.topPercent
40 --minPercentIdentity $advanced_options.minPercentIdentity
41 --maxKeggPerRead $advanced_options.maxKeggPerRead
42 $advanced_options.applyTopPercentKegg
43 $advanced_options.parseTaxonNames
44 #if $advanced_options.mapDB:
45 --mapDB '$advanced_options.mapDB'
46 #end if
47 #if $advanced_options.acc2taxa:
48 --acc2taxa '$advanced_options.acc2taxa'
49 #end if
50 #if $advanced_options.syn2taxa:
51 --syn2taxa '$advanced_options.syn2taxa'
52 #end if
53 #if $advanced_options.acc2kegg:
54 --acc2kegg '$advanced_options.acc2kegg'
55 #end if
56 #if $advanced_options.syn2kegg:
57 --syn2kegg '$advanced_options.syn2kegg'
58 #end if
59 $advanced_options.firstWordIsAccession
60 #if str($advanced_options.accessionTags) != '':
61 --accessionTags '$advanced_options.maccessionTags'
62 #end if
63 #if $advanced_options.kegg:
64 --kegg
65 --keggOutput '$kegg_output'
66 #end if
67 --output '${taxonomy_output}'
68 ]]></command>
69 <inputs>
70 <param name="blast_input" argument="--input" type="data" format="daa,blastxml,sam,tabular,txt" label="Blast file"/>
71 <param argument="--mode" type="select" label="Blast mode">
72 <expand macro="blast_mode_options"/>
73 </param>
74 <section name="advanced_options" title="Advanced options" expanded="false">
75 <param argument="--kegg" type="boolean" truevalue="--kegg" falsevalue="" checked="false" label="Map reads to KEGG KOs?"/>
76 <param argument="--showRanks" type="boolean" truevalue="--showRanks" falsevalue="" checked="true" label="Show taxonomic ranks?"/>
77 <param argument="--officialRanksOnly" type="boolean" truevalue="--officialRanksOnly" falsevalue="" checked="true" label="Report only taxa that have an official rank?"/>
78 <param argument="--showTaxIds" type="boolean" truevalue="--showTaxIds" falsevalue="" checked="false" label="Report taxon ids rather than taxon names?"/>
79 <expand macro="common_blast_params"/>
80 <param argument="--maxKeggPerRead" type="integer" value="4" label="Maximum number of KEGG assignments to report for a read"/>
81 <param argument="--applyTopPercentKegg" type="boolean" truevalue="--applyTopPercentKegg" falsevalue="" checked="true" label="Apply top percent filter in KEGG KO analysis?"/>
82 <param argument="--parseTaxonNames" type="boolean" truevalue="--parseTaxonNames" falsevalue="" checked="true" label="Apply top percent filter in KEGG KO analysis?"/>
83 <param argument="--mapDB" type="data" format="sqlite" optional="true" label="MEGAN mapping db"/>
84 <param argument="--acc2taxa" type="data" format="sqlite" optional="true" label="Accession-to-Taxonomy mapping file"/>
85 <param argument="--syn2taxa" type="data" format="sqlite" optional="true" label="Synonyms-to-Taxonomy mapping file"/>
86 <param argument="--acc2kegg" type="data" format="sqlite" optional="true" label="Accession-to-KEGG mapping file"/>
87 <param argument="--syn2kegg" type="data" format="sqlite" optional="true" label="Synonyms-to-KEGG mapping file"/>
88 <param argument="--firstWordIsAccession" type="boolean" truevalue="--firstWordIsAccession" falsevalue="" checked="true" label="First word in reference header is accession number?" help="Set to true for NCBI-nr downloaded Sep 2016 or later"/>
89 <param argument="--accessionTags" type="text" optional="true" label="List of accession tags" help="Specify a space-separated list of tags (e.g., 'gb|' 'ref|')">
90 <expand macro="sanitize_query" validinitial="string.ascii_letters,string.punctuation"/>
91 </param>
92 </section>
93 </inputs>
94 <outputs>
95 <data name="taxonomy_output" format="txt"/>
96 <data name="kegg_output" format="txt" label="${tool.name} on ${on_string} (KEGG)">
97 <filter>advanced_options['kegg']</filter>
98 </data>
99 </outputs>
100 <tests>
101 <test expect_num_outputs="2">
102 <param name="blast_input" value="blast_R1.txt" ftype="txt"/>
103 <param name="mode" value="BlastN"/>
104 <section name="advanced_options">
105 <param name="kegg" value="true"/>
106 </section>
107 <output name="taxonomy_output" file="taxonomy_output.txt" ftype="txt"/>
108 <output name="kegg_output" file="kegg_output.txt" ftype="txt"/>
109 </test>
110 </tests>
111 <help>
112 **What it does**
113
114 Applies the LCA alignment to reads and can also perform KEGG classification. The input is a BLAST file or something similar.
115 This wrapper supports the following formats for the input Blast file. The SAM, Tabular and Text formats can be produced by
116 The Galaxy MALT Analyzer tool. When these formats are used, this tool will apply the SAM, BlastText and BlastTab format options
117 required by MEGAN.
118
119 * **Direct Access Archive (DAA)** - a proprietary file format developed by PowerISO Computing for disk image files
120 * **BlastXML** - XML output from Blast
121 * **Sequence Alignment/Map (SAM)** - a tab-delimited text format consisting of a header section, which is optional, and an alignment section
122 * **Tabular** - information presented in the form of a table with rows and columns
123 * **Text** - plain text format
124
125 The tool produces a text file for the LCA alignment.
126
127 If the option to Map reads to KEGG KOs is selected, an additional text file containing the KEGG classification is produced.
128 The KEGG database provides a collection of metabolic pathways and other pathways, but due to KEGG licensing restrictions, the
129 Community Edition of KEGG (used by this tool) ships with an early 2011 version of the KEGG classification, so KEGG pathways
130 cannot be viewed in the putput.
131
132 The KEGG classification can be displayed as a tree. Genes are mapped onto so-called KO groups and these are present in one or
133 more pathways. The MEGAN program will attempt to map each read onto a gene that has a valid KO identifier and thus, to one or
134 more pathways.
135
136 To perform this analysis, MEGAN uses a mapping of GI numbers to KO groups. Hence, if a KEGG-based analysis is desired, then
137 the database that is used in the BLAST alignment must contain GI numbers.
138 </help>
139 <citations>
140 <citation type="doi">https://doi.org/10.1101/050559</citation>
141 </citations>
142 </tool>
143