changeset 3:a9edbe21bf47 draft

"planemo upload for repository https://git.ufz.de/lehmanju/rnaquast commit 5ba8cddaafd411e30baa19da0f93959ef5ccaca0"
author iuc
date Fri, 14 Jan 2022 18:42:15 +0000
parents 96f74538896e
children f9f2ad782d8f
files rna_quast.xml
diffstat 1 files changed, 78 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/rna_quast.xml	Tue Oct 19 11:02:19 2021 +0000
+++ b/rna_quast.xml	Fri Jan 14 18:42:15 2022 +0000
@@ -1,10 +1,11 @@
-<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@">
-    <description>A Quality Assessment Tool for De Novo Transcriptome Assemblies</description>
+<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>A quality assessment tool for De Novo transcriptome assemblies</description>
     <xrefs>
         <xref type="bio.tools">rnaQUAST</xref>
     </xrefs>
     <macros>
         <token name="@TOOL_VERSION@">2.2.1</token>
+        <token name="@VERSION_SUFFIX@">1</token>
         <xml name="element_matching_line" token_name="" token_expression="">
             <element name="@NAME@">
                 <assert_contents>
@@ -92,7 +93,7 @@
     </stdio>
     <command detect_errors="exit_code"><![CDATA[
     #import re
-    #for $i in $in_fasta
+    #for $i in $transcripts
         ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
     #end for
     #if $r
@@ -109,7 +110,7 @@
     rnaQUAST.py
     --threads \${GALAXY_SLOTS:-1}
     --transcripts
-    #for $i in $in_fasta
+    #for $i in $transcripts
          '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
     #end for
     $strand_specific
@@ -133,8 +134,10 @@
         --no_plots
     #end if
     $blat
-    $busco_lineage
-    ##GeneMarkS-T is not available in conda $gene_mark
+    #if $busco_option.busco == 'true'
+        --busco $busco_option.lineage
+    #end if
+    ##$gene_mark
     $meta
     --lower_threshold $lower_threshold
     --upper_threshold $upper_threshold
@@ -145,7 +148,7 @@
     ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output)
     ## to a joint dir (details) to make them discoverable
     ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work)
-    #for $i in $in_fasta
+    #for $i in $transcripts
         #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
         &&
         (for f in \$(find 'outputdir/'$basename'_output' -type f);
@@ -161,9 +164,10 @@
     && true
     ]]>    </command>
     <inputs>
-        <param name="in_fasta" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file" />
-        <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific" />
-        <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
+        <param argument="--transcripts" type="data" format="fasta" multiple="true" label="Transcripts" help="File(s) with transcripts in FASTA format."/>
+        <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific" 
+            help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/>
+        <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." />
         <conditional name="gene_coordinates">
             <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl.">
                 <option value="true" selected="true">Yes</option>
@@ -171,20 +175,37 @@
             </param>
             <when value="true">
                 <param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" />
-                <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?" />
-                <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?" />
+                <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?" 
+                    help="Use this option if your GTF file already contains genes records, otherwise gffutils will fix it. Note that gffutils may work for quite a long time"/>
+                <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/>
             </when>
             <when value="false">
             </when>
         </conditional>
-        <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?" />
-        <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used" />
-        <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" />
-        <param argument="--busco_lineage" type="boolean" truevalue="--busco_lineage" falsevalue="" checked="false" label="Run BUSCO tool?" help="The BUSCO tool detects core genes in the assembly. Use this option to provide path to the BUSCO lineage data (Eukaryota, Metazoa, Arthropoda, Vertebrata or Fungi)." />
-        <!-- GeneMarkS-T is not available in conda <param argument="\-\-gene_mark" type="boolean" truevalue="\-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/>-->
-        <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for Meta Transcriptome" />
-        <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x_assembled/covered/matched metrics." />
-        <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x_assembled/covered/matched metrics." />
+        <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?" help="Use this option if the genome is prokaryotic."/>
+        <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used" help="Default value is 50"/>
+        <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" help="Blat is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " />
+        <conditional name="busco_option">
+          <param argument="--busco" type="select" label="Run BUSCO tool?" help="BUSCO allows to detect core genes in the assembled transcripts">
+              <option value="false">Disabled</option>
+              <option value="true">Enabled</option>
+          </param>
+          <when value="false"/>
+          <when value="true">
+            <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO">
+                <option value="metazoa">Metazoa</option>
+                <option value="eukaryota">Eukaryota</option>
+                <option value="arthropoda">Arthropoda</option>
+                <option value="vertebrata">Vertebrata</option>
+                <option value="fungi">Fungi</option>
+                <option value="bacteria">Bacteria</option>
+            </param>
+          </when>
+        </conditional>
+        <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/-->
+        <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" />
+        <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." />
+        <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." />
         <param name="out_sr" type="select" multiple="true" label="Short report formats">
             <option value="tsv" selected="true">tabular</option>
             <option value="txt">txt</option>
@@ -218,14 +239,14 @@
             <filter>"logs" in out_add</filter>
         </collection>
         <!-- note the output filter of the next two outputs checks if there is
-             more than 1 input for in_fasta (for 1 its a HDA, for more list or HDAs) -->
+             more than 1 input for transcripts (for 1 its a HDA, for more list or HDAs) -->
         <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots">
             <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true" />
-            <filter> isinstance(in_fasta, list) and "plots" in out_add</filter>
+            <filter> isinstance(transcripts, list) and "plots" in out_add</filter>
         </collection>
         <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison">
             <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" />
-            <filter> isinstance(in_fasta, list) and "comparison" in out_add</filter>
+            <filter> isinstance(transcripts, list) and "comparison" in out_add</filter>
         </collection>
         <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output">
             <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false" />
@@ -238,7 +259,7 @@
     </outputs>
     <tests>
         <test expect_num_outputs="7">
-            <param name="in_fasta" value="idba.fasta,Trinity.fasta" ftype="fasta" />
+            <param name="transcripts" value="idba.fasta,Trinity.fasta" ftype="fasta" />
             <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
             <conditional name="gene_coordinates">
                 <param name="use_gtf" value="true" />
@@ -260,7 +281,7 @@
             </output_collection>
         </test>
         <test expect_num_outputs="6">
-            <param name="in_fasta" value="Trinity.fasta" ftype="fasta" />
+            <param name="transcripts" value="Trinity.fasta" ftype="fasta" />
             <conditional name="gene_coordinates">
                 <param name="use_gtf" value="false" />
             </conditional>
@@ -285,6 +306,38 @@
                 </element>
             </output_collection>
         </test>
+        <test expect_num_outputs="6">
+            <param name="transcripts" value="Trinity.fasta" ftype="fasta" />
+            <conditional name="gene_coordinates">
+                <param name="use_gtf" value="false" />
+            </conditional>
+            <param name="min_alignment" value="30" />
+            <param name="lower_threshold" value="45" />
+            <param name="upper_threshold" value="95" />
+            <param name="out_sr" value="txt,tex,tsv,pdf" />
+            <param name="out_add" value="logs,details_plots" />
+            <conditional name="busco_option">
+                <param name="busco" value="true"/>
+                <param name="lineage" value="metazoa"/>
+            </conditional>
+            <expand macro="pdf_output_test" />
+            <expand macro="tex_output_test" />
+            <expand macro="tsv_output_test" />
+            <expand macro="txt_output_test" />
+            <output_collection name="list_logs" type="list">
+                <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" />
+                <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" />
+            </output_collection>
+            <output_collection name="details_png" type="list:list" count="1">
+                <element name="Trinity">
+                    <expand macro="element_has_text" name="Nx" text="PNG" />
+                    <expand macro="element_has_text" name="transcript_length" text="PNG" />
+                </element>
+            </output_collection>
+            <assert_command>
+                <has_text text="--busco metazoa"/>
+            </assert_command>
+        </test>
     </tests>
     <help><![CDATA[
 **What is rnaQUAST**