diff blast2rma.xml @ 0:fa3c3a64c993 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/megan commit 2a49a6cdc1b4d37ab30eb85b8c658ccf9f5a0644"
author iuc
date Wed, 24 Nov 2021 21:52:14 +0000
parents
children 2f8d3924bb3b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast2rma.xml	Wed Nov 24 21:52:14 2021 +0000
@@ -0,0 +1,246 @@
+<tool id="megan_blast2rma" name="MEGAN: Generate RMA files" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>from BLAST output</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+
+#if str($input_type_cond.input_type) in ['single', 'pair']:
+    #set read1 = $input_type_cond.read1
+    #set blast1 = $input_type_cond.blast1
+#else:
+    ## Processing paired reads are tricky if we're
+    ## downstream from MALT.  MALT doesn’t have a
+    ## paired-read mode, so it won’t attempt to analyze
+    ## reads in pairs.  To do paired read processing,
+    ## set MALT to generate SAM files and then import the
+    ## SAM files into MEGAN, specifying paired read mode
+    ## there. If you have multiple SAM files for the same
+    ## sample, then import them all at the same time to
+    ## create one unified rma6 file.
+
+    #set read1 = $input_type_cond.reads_collection['forward']
+    #set blast1 = $input_type_cond.blast1
+#end if
+
+#if $read1.is_of_type('fasta', 'fasta.gz'):
+    #set read_ext = '.fasta'
+#else:
+    #set read_ext = '.fastq'
+#end if
+#if $read1.ext.endswith('.gz'):
+    #set read_ext = $read_ext + '.gz'
+#end if
+
+#if $blast1.is_of_type('daa'):
+    #set blast_format = 'DAA'
+#else if $blast1.is_of_type('txt'):
+    #set blast_format = 'BlastText'
+#else if $blast1.is_of_type('blastxml'):
+    #set blast_format = 'BlastXML'
+#else if $blast1.is_of_type('tabular'):
+    #set blast_format = 'BlastTab'
+#else if $blast1.is_of_type('sam'):
+    #set blast_format = 'SAM'
+#end if
+#set blast_ext = '.' + $blast_format
+#if $blast1.ext.endswith('.gz'):
+    #set blast_ext = $blast_ext + '.gz'
+#end if
+
+#set read1_identifier = 'read1' + $read_ext
+ln -s '${read1}' '${read1_identifier}' &&
+
+#set blast1_identifier = 'blast1' + $blast_ext
+ln -s '${blast1}' '${blast1_identifier}' &&
+
+#if str($input_type_cond.input_type) in ['pair', 'paired']:
+    #if str($input_type_cond.input_type) == 'pair':
+        #set read2 = $input_type_cond.read2
+        #set blast2 = $input_type_cond.blast2
+    #else if str($input_type_cond.input_type) == 'paired':
+        #set read2 = $input_type_cond.reads_collection['reverse']
+        #set blast2 = $input_type_cond.blast2
+    #end if
+    #set read2_identifier = 'read2' + $read_ext
+    ln -s '${read2}' '${read2_identifier}' &&
+    #set blast2_identifier = 'blast2' + $blast_ext
+    ln -s '${blast2}' '${blast2_identifier}' &&
+#end if
+
+blast2rma 
+#if str($input_type_cond.input_type) == 'single':
+    --in '${blast1_identifier}'
+    --reads '${read1_identifier}'
+    --out '${rma6_output}'
+#else if str($input_type_cond.input_type) == 'pair':
+    --in '${blast1_identifier}' '${blast2_identifier}'
+    --reads '${read1_identifier}' '${read2_identifier}'
+    --paired
+    --pairedSuffixLength $input_type_cond.pairedSuffixLength 
+    --out '${rma6_output}'
+#else if str($input_type_cond.input_type) == 'paired':
+    --in '${blast1_identifier}' '${blast2_identifier}'
+    --reads '${read1_identifier}' '${read2_identifier}'
+    --paired
+    --pairedSuffixLength $input_type_cond.pairedSuffixLength 
+    ## Strangely, megan requires an output
+    ## directory when processing paired reads
+    ## even though it produces a single file.
+    ## We'll accommodate thie by prepending ./
+    ## to a temporary output file and then move
+    ## it later.
+    --out './tmp.rma6'
+#end if
+--format '${blast_format}'
+--blastMode '${blastMode}'
+--threads \${GALAXY_SLOTS:-8}
+--useCompression false
+$advanced_options.longReads
+--maxMatchesPerRead '$advanced_options.maxMatchesPerRead'
+$advanced_options.classify
+--minScore $advanced_options.minScore
+--maxExpected $advanced_options.maxExpected
+--minPercentIdentity $advanced_options.minPercentIdentity
+--topPercent $advanced_options.topPercent
+--minSupportPercent $advanced_options.minSupportPercent
+--minSupport $advanced_options.minSupport
+--minPercentReadCover $advanced_options.minPercentReadCover
+--minPercentReferenceCover $advanced_options.minPercentReferenceCover
+--minReadLength $advanced_options.minReadLength
+--lcaAlgorithm '$advanced_options.lcaAlgorithm'
+--lcaCoveragePercent $advanced_options.lcaCoveragePercent
+--readAssignmentMode '$advanced_options.readAssignmentMode'
+#if str($advanced_options.con_file_cond.conFile) == 'yes':
+    --conFile '$advanced_options.con_file_cond.conFile'
+#end if
+#if str($input_type_cond.input_type) == 'paired':
+    && mv './tmp.rma6' '$rma6_output'
+#end if
+]]></command>
+    <inputs>
+        <expand macro="input_type_cond"/>
+        <param argument="--blastMode" type="select" label="Blast mode">
+            <expand macro="blast_mode_options"/>
+        </param>
+        <section name="advanced_options" title="Advanced options" expanded="false">
+            <param argument="--longReads" type="boolean" truevalue="--longReads" falsevalue="" checked="false" label="Parse and analyse input reads as long reads?"/>
+            <param argument="--maxMatchesPerRead" type="integer" value="100" label="Maximum matches per read"/>
+            <param argument="--classify" type="boolean" truevalue="--classify" falsevalue="" checked="true" label="Run classification algorithm?"/>
+            <expand macro="common_blast_params"/>
+            <param argument="--minSupportPercent" type="float" value="0.05" min="0.0" max="100.0" label="Minimum support as percent of assigned reads" help="0 value ignores"/>
+            <param argument="--minSupport" type="integer" value="0" label="Minimum support" help="0 value ignores"/>
+            <param argument="--minPercentReadCover" type="float" value="0.0" min="0.0" max="100.0" label="Minimum percent of read length to be covered by alignments"/>
+            <param argument="--minPercentReferenceCover" type="float" value="0.0" min="0.0" max="100.0" label="Minimum percent of reference length to be covered by alignments"/>
+            <param argument="--minReadLength" type="integer" value="0" label="Minimum read length"/>
+            <param argument="--lcaAlgorithm" type="select" label="Select the LCA algorithm to use for taxonomic assignment">
+                <option value="naive" selected="true">naive</option>
+                <option value="weighted">weighted</option>
+                <option value="longReads">longReads</option>
+            </param>
+            <param argument="--lcaCoveragePercent" type="float" value="100.0" min="0.0" max="100.0" label="Percent for the LCA to cover"/>
+            <param argument="--readAssignmentMode" type="select" label="Select the read assignment mode">
+                <option value="alignedBases" selected="true">alignedBases</option>
+                <option value="readCount">readCount</option>
+            </param>
+            <conditional name="con_file_cond">
+                <param argument="--conFile" type="select" label="Process a file of contaminant taxa" help="One id or name per line">
+                    <option value="no" selected="true">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+                <when value="no"/>
+                <when value="yes">
+                    <param argument="conFile" type="data" format="txt" label="File of contaminant taxa"/>
+                </when>
+            </conditional>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="rma6_output" format="rma6"/>
+    </outputs>
+    <tests>
+        <!-- Single dataset input -->
+        <test expect_num_outputs="1">
+            <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="blast1" value="blast_R1.txt" ftype="txt"/>
+            <param name="blastMode" value="BlastN"/>
+            <output name="rma6_output" ftype="rma6">
+                <assert_contents>
+                    <has_size value="19596"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Single dataset input, contaminants file -->
+        <test expect_num_outputs="1">
+            <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="blast1" value="blast_R1.txt" ftype="txt"/>
+            <param name="blastMode" value="BlastN"/>
+            <param name="conFile" value="yes"/>
+            <param name="conFile" value="contaminants.txt" ftype="txt"/>
+            <output name="rma6_output" ftype="rma6">
+                <assert_contents>
+                    <has_size value="19596"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Dataset pair input -->
+        <test expect_num_outputs="1">
+            <param name="input_type" value="pair"/>
+            <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="blast1" value="blast_R1.txt" ftype="txt"/>
+            <param name="blast2" value="blast_R2.txt" ftype="txt"/>
+            <param name="blastMode" value="BlastN"/>
+            <output name="rma6_output" ftype="rma6">
+                <assert_contents>
+                    <has_size value="39887"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- List of dataset pairs input -->
+        <test expect_num_outputs="1">
+            <param name="input_type" value="paired"/>
+            <param name="reads_collection">
+                <collection type="paired">
+                    <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/>
+                    <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz"/>
+                </collection>
+            </param>
+            <param name="blast1" value="blast_R1.txt" ftype="txt"/>
+            <param name="blast2" value="blast_R2.txt" ftype="txt"/>
+            <param name="blastMode" value="BlastN"/>
+            <output name="rma6_output" ftype="rma6">
+                <assert_contents>
+                    <has_size value="39806"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Computes MEGAN RMA files from BLAST (or similar) files.  Inputs consist of reads in fasta or fasqsanger format (gzip compressin
+is supported) and associated Blast files.  Each read file should have been used previously as the Blast input to produce the
+associated Blast file for this tool.
+
+This wrapper supports the following formats for the input Blast file.  The SAM, Tabular and Text formats can be produced by
+The Galaxy MALT Analyzer tool.  When these formats are used, this tool will apply the SAM, BlastText and BlastTab format options
+required by MEGAN.
+
+ * **Direct Access Archive (DAA)** - a proprietary file format developed by PowerISO Computing for disk image files
+ * **BlastXML** - XML output from Blast
+ * **Sequence Alignment/Map (SAM)** - a tab-delimited text format consisting of a header section, which is optional, and an alignment section
+ * **Tabular** - information presented in the form of a table with rows and columns
+ * **Text** - plain text format
+
+This tool outputs a RealMedia Audio (RMA) file.  MEGAN uses an update of the original RMA file format known as RMA6.  This update
+requires less disk space for files.
+    </help>
+    <citations>
+        <citation type="doi">https://doi.org/10.1101/050559</citation>
+    </citations>
+</tool>
+