Mercurial > repos > iuc > megan_blast2lca
diff blast2lca.xml @ 0:ad69d2a05c3c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/megan commit 2a49a6cdc1b4d37ab30eb85b8c658ccf9f5a0644"
author | iuc |
---|---|
date | Wed, 24 Nov 2021 21:52:36 +0000 |
parents | |
children | 1930eb870dca |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blast2lca.xml Wed Nov 24 21:52:36 2021 +0000 @@ -0,0 +1,143 @@ +<tool id="megan_blast2lca" name="MEGAN Blast2LCA: apply LCA alignment" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>to produce a taxonomic classification</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="bio_tools"/> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +#import re + +#if $blast_input.is_of_type('daa'): + #set blast_format = 'DAA' +#else if $blast_input.is_of_type('txt'): + #set blast_format = 'BlastText' +#else if $blast_input.is_of_type('blastxml'): + #set blast_format = 'BlastXML' +#else if $blast_input.is_of_type('tabular'): + #set blast_format = 'BlastTab' +#else if $blast_input.is_of_type('sam'): + #set blast_format = 'SAM' +#end if +#set blast_ext = '.' + $blast_format +#if $blast_input.ext.endswith('.gz'): + #set blast_ext = $blast_ext + '.gz' +#end if + +#set blast_input_identifier = 'blast_input' + $blast_ext +ln -s '${blast_input}' '${blast_input_identifier}' && + +blast2lca + --input '${blast_input_identifier}' + --format '${blast_format}' + --mode '${mode}' + $advanced_options.showRanks + $advanced_options.officialRanksOnly + $advanced_options.showTaxIds + --minScore $advanced_options.minScore + --maxExpected $advanced_options.maxExpected + --topPercent $advanced_options.topPercent + --minPercentIdentity $advanced_options.minPercentIdentity + --maxKeggPerRead $advanced_options.maxKeggPerRead + $advanced_options.applyTopPercentKegg + $advanced_options.parseTaxonNames + #if $advanced_options.mapDB: + --mapDB '$advanced_options.mapDB' + #end if + #if $advanced_options.acc2taxa: + --acc2taxa '$advanced_options.acc2taxa' + #end if + #if $advanced_options.syn2taxa: + --syn2taxa '$advanced_options.syn2taxa' + #end if + #if $advanced_options.acc2kegg: + --acc2kegg '$advanced_options.acc2kegg' + #end if + #if $advanced_options.syn2kegg: + --syn2kegg '$advanced_options.syn2kegg' + #end if + $advanced_options.firstWordIsAccession + #if str($advanced_options.accessionTags) != '': + --accessionTags '$advanced_options.maccessionTags' + #end if + #if $advanced_options.kegg: + --kegg + --keggOutput '$kegg_output' + #end if + --output '${taxonomy_output}' +]]></command> + <inputs> + <param name="blast_input" argument="--input" type="data" format="daa,blastxml,sam,tabular,txt" label="Blast file"/> + <param argument="--mode" type="select" label="Blast mode"> + <expand macro="blast_mode_options"/> + </param> + <section name="advanced_options" title="Advanced options" expanded="false"> + <param argument="--kegg" type="boolean" truevalue="--kegg" falsevalue="" checked="false" label="Map reads to KEGG KOs?"/> + <param argument="--showRanks" type="boolean" truevalue="--showRanks" falsevalue="" checked="true" label="Show taxonomic ranks?"/> + <param argument="--officialRanksOnly" type="boolean" truevalue="--officialRanksOnly" falsevalue="" checked="true" label="Report only taxa that have an official rank?"/> + <param argument="--showTaxIds" type="boolean" truevalue="--showTaxIds" falsevalue="" checked="false" label="Report taxon ids rather than taxon names?"/> + <expand macro="common_blast_params"/> + <param argument="--maxKeggPerRead" type="integer" value="4" label="Maximum number of KEGG assignments to report for a read"/> + <param argument="--applyTopPercentKegg" type="boolean" truevalue="--applyTopPercentKegg" falsevalue="" checked="true" label="Apply top percent filter in KEGG KO analysis?"/> + <param argument="--parseTaxonNames" type="boolean" truevalue="--parseTaxonNames" falsevalue="" checked="true" label="Apply top percent filter in KEGG KO analysis?"/> + <param argument="--mapDB" type="data" format="sqlite" optional="true" label="MEGAN mapping db"/> + <param argument="--acc2taxa" type="data" format="sqlite" optional="true" label="Accession-to-Taxonomy mapping file"/> + <param argument="--syn2taxa" type="data" format="sqlite" optional="true" label="Synonyms-to-Taxonomy mapping file"/> + <param argument="--acc2kegg" type="data" format="sqlite" optional="true" label="Accession-to-KEGG mapping file"/> + <param argument="--syn2kegg" type="data" format="sqlite" optional="true" label="Synonyms-to-KEGG mapping file"/> + <param argument="--firstWordIsAccession" type="boolean" truevalue="--firstWordIsAccession" falsevalue="" checked="true" label="First word in reference header is accession number?" help="Set to true for NCBI-nr downloaded Sep 2016 or later"/> + <param argument="--accessionTags" type="text" optional="true" label="List of accession tags" help="Specify a space-separated list of tags (e.g., 'gb|' 'ref|')"> + <expand macro="sanitize_query" validinitial="string.ascii_letters,string.punctuation"/> + </param> + </section> + </inputs> + <outputs> + <data name="taxonomy_output" format="txt"/> + <data name="kegg_output" format="txt" label="${tool.name} on ${on_string} (KEGG)"> + <filter>advanced_options['kegg']</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="2"> + <param name="blast_input" value="blast_R1.txt" ftype="txt"/> + <param name="mode" value="BlastN"/> + <section name="advanced_options"> + <param name="kegg" value="true"/> + </section> + <output name="taxonomy_output" file="taxonomy_output.txt" ftype="txt"/> + <output name="kegg_output" file="kegg_output.txt" ftype="txt"/> + </test> + </tests> + <help> +**What it does** + +Applies the LCA alignment to reads and can also perform KEGG classification. The input is a BLAST file or something similar. +This wrapper supports the following formats for the input Blast file. The SAM, Tabular and Text formats can be produced by +The Galaxy MALT Analyzer tool. When these formats are used, this tool will apply the SAM, BlastText and BlastTab format options +required by MEGAN. + + * **Direct Access Archive (DAA)** - a proprietary file format developed by PowerISO Computing for disk image files + * **BlastXML** - XML output from Blast + * **Sequence Alignment/Map (SAM)** - a tab-delimited text format consisting of a header section, which is optional, and an alignment section + * **Tabular** - information presented in the form of a table with rows and columns + * **Text** - plain text format + +The tool produces a text file for the LCA alignment. + +If the option to Map reads to KEGG KOs is selected, an additional text file containing the KEGG classification is produced. +The KEGG database provides a collection of metabolic pathways and other pathways, but due to KEGG licensing restrictions, the +Community Edition of KEGG (used by this tool) ships with an early 2011 version of the KEGG classification, so KEGG pathways +cannot be viewed in the putput. + +The KEGG classification can be displayed as a tree. Genes are mapped onto so-called KO groups and these are present in one or +more pathways. The MEGAN program will attempt to map each read onto a gene that has a valid KO identifier and thus, to one or +more pathways. + +To perform this analysis, MEGAN uses a mapping of GI numbers to KO groups. Hence, if a KEGG-based analysis is desired, then +the database that is used in the BLAST alignment must contain GI numbers. + </help> + <citations> + <citation type="doi">https://doi.org/10.1101/050559</citation> + </citations> +</tool> +