diff blast2lca.xml @ 0:ad69d2a05c3c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/megan commit 2a49a6cdc1b4d37ab30eb85b8c658ccf9f5a0644"
author iuc
date Wed, 24 Nov 2021 21:52:36 +0000
parents
children 1930eb870dca
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast2lca.xml	Wed Nov 24 21:52:36 2021 +0000
@@ -0,0 +1,143 @@
+<tool id="megan_blast2lca" name="MEGAN Blast2LCA: apply LCA alignment" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>to produce a taxonomic classification</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+
+#if $blast_input.is_of_type('daa'):
+    #set blast_format = 'DAA'
+#else if $blast_input.is_of_type('txt'):
+    #set blast_format = 'BlastText'
+#else if $blast_input.is_of_type('blastxml'):
+    #set blast_format = 'BlastXML'
+#else if $blast_input.is_of_type('tabular'):
+    #set blast_format = 'BlastTab'
+#else if $blast_input.is_of_type('sam'):
+    #set blast_format = 'SAM'
+#end if
+#set blast_ext = '.' + $blast_format
+#if $blast_input.ext.endswith('.gz'):
+    #set blast_ext = $blast_ext + '.gz'
+#end if
+
+#set blast_input_identifier = 'blast_input' + $blast_ext
+ln -s '${blast_input}' '${blast_input_identifier}' &&
+
+blast2lca 
+    --input '${blast_input_identifier}'
+    --format '${blast_format}'
+    --mode '${mode}'
+    $advanced_options.showRanks
+    $advanced_options.officialRanksOnly
+    $advanced_options.showTaxIds
+    --minScore $advanced_options.minScore
+    --maxExpected $advanced_options.maxExpected
+    --topPercent $advanced_options.topPercent
+    --minPercentIdentity $advanced_options.minPercentIdentity
+    --maxKeggPerRead $advanced_options.maxKeggPerRead
+    $advanced_options.applyTopPercentKegg
+    $advanced_options.parseTaxonNames
+    #if $advanced_options.mapDB:
+        --mapDB '$advanced_options.mapDB'
+    #end if
+    #if $advanced_options.acc2taxa:
+        --acc2taxa '$advanced_options.acc2taxa'
+    #end if
+    #if $advanced_options.syn2taxa:
+        --syn2taxa '$advanced_options.syn2taxa'
+    #end if
+    #if $advanced_options.acc2kegg:
+        --acc2kegg '$advanced_options.acc2kegg'
+    #end if
+    #if $advanced_options.syn2kegg:
+        --syn2kegg '$advanced_options.syn2kegg'
+    #end if
+    $advanced_options.firstWordIsAccession
+    #if str($advanced_options.accessionTags) != '':
+        --accessionTags '$advanced_options.maccessionTags'
+    #end if
+    #if $advanced_options.kegg:
+        --kegg
+        --keggOutput '$kegg_output'
+    #end if
+    --output '${taxonomy_output}'
+]]></command>
+    <inputs>
+        <param name="blast_input" argument="--input" type="data" format="daa,blastxml,sam,tabular,txt" label="Blast file"/>
+        <param argument="--mode" type="select" label="Blast mode">
+            <expand macro="blast_mode_options"/>
+        </param>
+        <section name="advanced_options" title="Advanced options" expanded="false">
+            <param argument="--kegg" type="boolean" truevalue="--kegg" falsevalue="" checked="false" label="Map reads to KEGG KOs?"/>
+            <param argument="--showRanks" type="boolean" truevalue="--showRanks" falsevalue="" checked="true" label="Show taxonomic ranks?"/>
+            <param argument="--officialRanksOnly" type="boolean" truevalue="--officialRanksOnly" falsevalue="" checked="true" label="Report only taxa that have an official rank?"/>
+            <param  argument="--showTaxIds" type="boolean" truevalue="--showTaxIds" falsevalue="" checked="false" label="Report taxon ids rather than taxon names?"/>
+            <expand macro="common_blast_params"/>
+            <param argument="--maxKeggPerRead" type="integer" value="4" label="Maximum number of KEGG assignments to report for a read"/>
+            <param argument="--applyTopPercentKegg" type="boolean" truevalue="--applyTopPercentKegg" falsevalue="" checked="true" label="Apply top percent filter in KEGG KO analysis?"/>
+            <param argument="--parseTaxonNames" type="boolean" truevalue="--parseTaxonNames" falsevalue="" checked="true" label="Apply top percent filter in KEGG KO analysis?"/>
+            <param argument="--mapDB" type="data" format="sqlite" optional="true" label="MEGAN mapping db"/>
+            <param argument="--acc2taxa" type="data" format="sqlite" optional="true" label="Accession-to-Taxonomy mapping file"/>
+            <param argument="--syn2taxa" type="data" format="sqlite" optional="true" label="Synonyms-to-Taxonomy mapping file"/>
+            <param argument="--acc2kegg" type="data" format="sqlite" optional="true" label="Accession-to-KEGG mapping file"/>
+            <param argument="--syn2kegg" type="data" format="sqlite" optional="true" label="Synonyms-to-KEGG mapping file"/>
+            <param argument="--firstWordIsAccession" type="boolean" truevalue="--firstWordIsAccession" falsevalue="" checked="true" label="First word in reference header is accession number?" help="Set to true for NCBI-nr downloaded Sep 2016 or later"/>
+            <param argument="--accessionTags" type="text" optional="true" label="List of accession tags" help="Specify a space-separated list of tags (e.g., 'gb|' 'ref|')">
+                <expand macro="sanitize_query" validinitial="string.ascii_letters,string.punctuation"/>
+            </param>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="taxonomy_output" format="txt"/>
+        <data name="kegg_output" format="txt" label="${tool.name} on ${on_string} (KEGG)">
+            <filter>advanced_options['kegg']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="blast_input" value="blast_R1.txt" ftype="txt"/>
+            <param name="mode" value="BlastN"/>
+            <section name="advanced_options">
+                <param name="kegg" value="true"/>
+            </section>
+            <output name="taxonomy_output" file="taxonomy_output.txt" ftype="txt"/>
+            <output name="kegg_output" file="kegg_output.txt" ftype="txt"/>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Applies the LCA alignment to reads and can also perform KEGG classification.  The input is a BLAST file or something similar.
+This wrapper supports the following formats for the input Blast file.  The SAM, Tabular and Text formats can be produced by
+The Galaxy MALT Analyzer tool.  When these formats are used, this tool will apply the SAM, BlastText and BlastTab format options
+required by MEGAN.
+
+ * **Direct Access Archive (DAA)** - a proprietary file format developed by PowerISO Computing for disk image files
+ * **BlastXML** - XML output from Blast
+ * **Sequence Alignment/Map (SAM)** - a tab-delimited text format consisting of a header section, which is optional, and an alignment section
+ * **Tabular** - information presented in the form of a table with rows and columns
+ * **Text** - plain text format
+
+The tool produces a text file for the LCA alignment.
+
+If the option to Map reads to KEGG KOs is selected, an additional text file containing the KEGG classification is produced.
+The KEGG database provides a collection of metabolic pathways and other pathways, but due to KEGG licensing restrictions, the
+Community Edition of KEGG (used by this tool) ships with an early 2011 version of the KEGG classification, so KEGG pathways
+cannot be viewed in the putput.
+
+The KEGG classification can be displayed as a tree.  Genes are mapped onto so-called KO groups and these are present in one or
+more pathways.  The MEGAN program will attempt to map each read onto a gene that has a valid KO identifier and thus, to one or
+more pathways.
+
+To perform this analysis, MEGAN uses a mapping of GI numbers to KO groups. Hence, if a KEGG-based analysis is desired, then
+the database that is used in the BLAST alignment must contain GI numbers.
+    </help>
+    <citations>
+        <citation type="doi">https://doi.org/10.1101/050559</citation>
+    </citations>
+</tool>
+