Mercurial > repos > iuc > rnaquast

diff rna_quast.xml @ 0:33c060ec0ac9 draft
"planemo upload for repository https://git.ufz.de/lehmanju/rnaquast commit 89fd73a81e54e9f5722b0a83ee00dc47ab0cb1e3"
author: iuc
date: Mon, 19 Oct 2020 08:04:52 +0000
children: e989670c7fc7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rna_quast.xml	Mon Oct 19 08:04:52 2020 +0000
@@ -0,0 +1,332 @@
+<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@">
+    <description>A Quality Assessment Tool for De Novo Transcriptome Assemblies</description>
+    <macros>
+        <token name="@TOOL_VERSION@">2.1.0</token>
+        <xml name="element_matching_line" token_name="" token_expression="">
+            <element name="@NAME@">
+                <assert_contents><has_line_matching expression="@EXPRESSION@"/></assert_contents>
+            </element>
+        </xml>
+        <xml name="element_has_text" token_name="" token_text="">
+            <element name="@NAME@">
+                <assert_contents><has_text text="@TEXT@"/></assert_contents>
+            </element>
+        </xml>
+
+        <xml name="details_output_test" token_assembler="">
+            <element name="@ASSEMBLER@">
+                <element name="5000%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element>
+                <element name="9500%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element>
+                <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*"/>
+                <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*"/>
+                <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*"/>
+                <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*"/>
+                <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*"/>
+                <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*"/>
+                <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*"/>
+                <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*"/>
+                <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*"/>
+                <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*"/>
+                <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*"/>
+            </element>
+        </xml>
+
+        <xml name="txt_output_test" token_assembler="">
+            <output name="short_report_txt">
+                <assert_contents>
+                    <has_text text="SHORT SUMMARY REPORT"/>
+                </assert_contents>
+            </output>
+        </xml>
+        <xml name="tex_output_test" token_assembler="">
+            <output name="short_report_tex">
+                <assert_contents>
+                    <has_text text="Short summary report"/>
+                    <has_text text="end{document}"/>
+                </assert_contents>
+            </output>
+        </xml>
+        <xml name="tsv_output_test" token_assembler="">
+            <output name="short_report_tsv">
+                <assert_contents>
+                    <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$"/>
+                </assert_contents>
+            </output>
+        </xml>
+        <xml name="pdf_output_test" token_assembler="">
+            <output name="short_report_pdf">
+                <assert_contents>
+                    <has_text text="rnaQUAST short report"/>
+                </assert_contents>
+            </output>
+        </xml>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement>
+    </requirements>
+    <stdio>
+        <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" />
+    </stdio>
+    <command detect_errors="exit_code"><![CDATA[
+    #import re
+    #for $i in $in_fasta
+        ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
+    #end for
+    #if $r
+        #for $rf in $r
+            ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' &&
+        #end for
+    #end if
+    #if $gene_coordinates.use_gtf == "true"
+        #for $g in $gene_coordinates.gtf
+            ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' &&
+        #end for
+    #end if
+    mkdir outputdir &&
+    rnaQUAST.py
+    --threads \${GALAXY_SLOTS:-1}
+    --transcripts
+    #for $i in $in_fasta
+         '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
+    #end for
+    $strand_specific
+    #if $r
+        -r
+        #for $rf in $r
+            '${re.sub('[^\w\-.]', '_', rf.element_identifier)}'
+        #end for
+    #end if
+    #if $gene_coordinates.use_gtf == "true"
+        --gtf
+        #for $g in $gene_coordinates.gtf
+            '${re.sub('[^\w\-.]', '_', g.element_identifier)}'
+        #end for
+        $gene_coordinates.disable_infer_genes
+        $gene_coordinates.disable_infer_transcripts
+    #end if
+    $prokaryote
+    --min_alignment '$min_alignment'
+    #if "pdf" not in $out_sr and "plots" not in $out_add
+        --no_plots
+    #end if
+    $blat
+    $busco_lineage
+    ##GeneMarkS-T is not available in conda $gene_mark
+    $meta
+    --lower_threshold $lower_threshold
+    --upper_threshold $upper_threshold
+    -o outputdir
+
+    && mkdir details
+
+    ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output)
+    ## to a joint dir (details) to make them discoverable
+    ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work)
+    #for $i in $in_fasta
+        #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
+        &&
+        (for f in \$(find 'outputdir/'$basename'_output' -type f);
+        do
+            d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) &&
+            mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')";
+        done)
+    #end for
+
+    ## rename .list files to .txt files to make them detectable (format detection by extension)
+    ## the final `true` seems needed since otherwise the `;` at the end is swallowed
+    && find details/ -name "*.list" -exec mv {} {}.txt \;
+    && true
+    ]]></command>
+    <inputs>
+        <param name="in_fasta" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/>
+        <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific"/>
+        <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
+        <conditional name="gene_coordinates">
+            <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl.">
+                <option value="true" selected="true">Yes</option>
+                <option value="false">No</option>
+            </param>
+            <when value="true">
+                <param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file"/>
+                <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?"/>
+                <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?"/>
+            </when>
+            <when value="false">
+            </when>
+        </conditional>
+        <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?"/>
+        <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used"/>
+        <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" />
+        <param argument="--busco_lineage" type="boolean" truevalue="--busco_lineage" falsevalue="" checked="false" label="Run BUSCO tool?" help="The BUSCO tool detects core genes in the assembly. Use this option to provide path to the BUSCO lineage data (Eukaryota, Metazoa, Arthropoda, Vertebrata or Fungi)."/>
+        <!-- GeneMarkS-T is not available in conda <param argument="\-\-gene_mark" type="boolean" truevalue="\-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/>-->
+        <param argument="--meta"  type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for Meta Transcriptome"/>
+        <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x_assembled/covered/matched metrics."/>
+        <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x_assembled/covered/matched metrics."/>
+        <param name="out_sr" type="select" multiple="true" label="Short report formats">
+            <option value="tsv" selected="true">tabular</option>
+            <option value="txt">txt</option>
+            <option value="tex">tex</option>
+            <option value="pdf" selected="true">pdf</option>
+        </param>
+        <param name="out_add" type="select" multiple="true" label="Additional outputs">
+            <option value="logs">Logs</option>
+            <option value="plots" selected="true">Plots (only for n>1)</option>
+            <option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option>
+            <option value="details" selected="true">Details per Chromosomes/scaffolds file</option>
+            <option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf">
+            <filter>"pdf" in out_sr</filter>
+        </data>
+        <data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt">
+            <filter>"txt" in out_sr</filter>
+        </data>
+        <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex">
+            <filter>"tex" in out_sr</filter>
+        </data>
+        <data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv">
+            <filter>"tsv" in out_sr</filter>
+        </data>
+        <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs" >
+            <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log"  directory="outputdir/logs/" visible="false" />
+            <filter>"logs" in out_add</filter>
+        </collection>
+        <!-- note the output filter of the next two outputs checks if there is
+             more than 1 input for in_fasta (for 1 its a HDA, for more list or HDAs) -->
+        <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots" >
+            <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png"  directory="outputdir/comparison_output/" visible="false" recurse="true"/>
+            <filter> isinstance(in_fasta, list) and "plots" in out_add</filter>
+        </collection>
+        <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison" >
+            <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt"  directory="outputdir/comparison_output/" visible="false" recurse="true" />
+            <filter> isinstance(in_fasta, list) and "comparison" in out_add</filter>
+        </collection>
+        <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false"/>
+            <filter>"details" in out_add</filter>
+        </collection>
+        <collection name="details_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;png)" directory="details/" visible="false"/>
+            <filter>"details_plots" in out_add</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="7">
+            <param name="in_fasta" value="idba.fasta,Trinity.fasta" ftype="fasta" />
+            <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
+            <conditional name="gene_coordinates">
+                <param name="use_gtf" value="true" />
+                <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" />
+                <param name="disable_infer_genes" value="true"/>
+                <param name="disable_infer_transcripts" value="true"/>
+            </conditional>
+            <param name="out_sr" value="txt,tex,tsv" />
+            <param name="out_add" value="logs,comparison,plots,details" />
+            <expand macro="txt_output_test"/>
+            <expand macro="tex_output_test"/>
+            <expand macro="tsv_output_test"/>
+            <output_collection name="comparison_png" type="list" count="15"/>
+            <output_collection name="comparison" type="list" count="19"/>
+            <output_collection name="list_logs" type="list" count="8"/>
+            <output_collection name="details" type="list:list" count="2">
+                <expand macro="details_output_test" assembler="Trinity"/>
+                <expand macro="details_output_test" assembler="idba"/>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="6">
+            <param name="in_fasta" value="Trinity.fasta" ftype="fasta" />
+            <conditional name="gene_coordinates">
+                <param name="use_gtf" value="false" />
+            </conditional>
+            <param name="min_alignment" value="30" />
+            <param name="lower_threshold" value="45" />
+            <param name="upper_threshold" value="95"/>
+            <param name="out_sr" value="txt,tex,tsv,pdf" />
+            <param name="out_add" value="logs,details_plots" />
+
+            <expand macro="pdf_output_test"/>
+            <expand macro="tex_output_test"/>
+            <expand macro="tsv_output_test"/>
+            <expand macro="txt_output_test"/>
+            <output_collection name="list_logs" type="list">
+                <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text=""/>
+                <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!"/>
+            </output_collection>
+            <output_collection name="details_png" type="list:list" count="1">
+                <element name="Trinity">
+                    <expand macro="element_has_text" name="Nx" text="PNG"/>
+                    <expand macro="element_has_text" name="transcript_length" text="PNG"/>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What is rnaQUAST**
+- a quality assessment tool for de novo transcriptome assemblies
+- evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database
+- calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts
+
+**Using rnaQuast without reference** you wont get:
+
+- x-assembled (Exons)
+- Alignments per Isoform
+- x-covered (Exons)
+- x-matched (Blocks)
+- gmap build logs
+
+**Using rnaQuast with reference** you will get:
+- Reports
+- Logs
+- Alignement/Basic Metrics
+- Misassemblies/ Specificity/ Sensitivity
+- Alignment multiplicity
+- Block/ Transcript Lentgh
+- Blocks per alignment
+- Mismatch rate
+- x-aligned
+- Nx
+- Blocks per alignment
+- gmap build logs
+
+**Using rnaQuast without gene coordinates** you wont get:
+- x-assembled (Exons)
+- Alignments per Isoform
+- x-covered (Exons)
+- x-matched (Blocks)
+- gmap build logs
+- Database Metrics
+- Alignment multiplicity
+- Mismatch rate
+- NAx
+- x-aligned
+**Using rnaQuast with gene coordinates** you will get:
+- Reports
+- Logs
+- Alignement/Basic Metrics
+- Misassemblies/Specificity/Sensitivity
+- Alignment multiplicity
+- Block/Transcript length
+- Blocks per alignment
+- Mismatch rate
+- x-aligned
+- Nx/NAx
+- gmap build logs
+- Database Metrics
+- Alignment multiplicity
+More informations, see citations.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btw218 </citation>
+    </citations>
+</tool>
author	iuc
date	Mon, 19 Oct 2020 08:04:52 +0000
parents
children	e989670c7fc7