view rna_quast.xml @ 3:bf3dc4cae5bf draft

Uploaded
author lehmanju
date Wed, 14 Oct 2020 07:03:06 +0000
parents 7e130d325fa7
children cc0366f0bdf7
line wrap: on
line source

<tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@">
    <description>A Quality Assessment Tool for De Novo Transcriptome Assemblies</description>
    <macros>
        <token name="@TOOL_VERSION@">2.1.0</token>
        <xml name="element_matching_line" token_name="" token_expression="">
            <element name="@NAME@">
                <assert_contents><has_line_matching expression="@EXPRESSION@"/></assert_contents>
            </element>
        </xml>
        <xml name="element_has_text" token_name="" token_text="">
            <element name="@NAME@">
                <assert_contents><has_text text="@TEXXT@"/></assert_contents>
            </element>
        </xml>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement>
    </requirements>
    <stdio>
        <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" />
    </stdio>
    <command detect_errors="exit_code"><![CDATA[
    #import re
    #for $i in $input
        ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
    #end for
    #if $r
        #for $rf in $r
            ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' &&
        #end for
    #end if
    #if $gene_coordinates.use_gtf == "true"
        #for $g in $gene_coordinates.gtf
            ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' &&
        #end for
    #end if
    mkdir outputdir &&
    rnaQUAST.py
    --threads \${GALAXY_SLOTS:-1}
    --transcripts
    #for $i in $input
         '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
    #end for
    $strand_specific
    #if $r
        -r
        #for $rf in $r
            '${re.sub('[^\w\-.]', '_', rf.element_identifier)}'
        #end for
    #end if
    #if $gene_coordinates.use_gtf == "true"
        --gtf
        #for $g in $gene_coordinates.gtf
            '${re.sub('[^\w\-.]', '_', g.element_identifier)}'
        #end for
        $gene_coordinates.disable_infer_genes
        $gene_coordinates.disable_infer_transcripts
    #end if
    $prokaryote
    --min_alignment '$min_alignment'
    #if "pdf" not in $out_sr and "plots" not in $out_add
        --no_plots
    #end if
    $blat
    $busco_lineage
    $gene_mark
    --lower_threshold $lower_threshold
    --upper_threshold $upper_threshold
    -o outputdir
     && mkdir details 
     #for $i in $input
        #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
        &&
        (for f in \$(find 'outputdir/'$basename'_output' -type f);
        do
            d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) &&
            mv \$f details/"\$d"_____"\$(basename \$f)";
        done)
    #end for
    ## rename .list files to .txt files to make them detectable (format detection by extension)
    ## the final `true` seems needed since otherwise the `;` at the end is swallowed
    && find details/ -name "*.list" -exec mv {} {}.txt \;
    && true
    ]]></command>
    <inputs>
        <param name="input" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/>
        <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific"/>
        <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
        <conditional name="gene_coordinates">
            <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl.">
                <option value="true" selected="true">Yes</option>
                <option value="false">No</option>
            </param>
            <when value="true">
                <param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file"/>
                <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?"/>
                <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?"/>
            </when>
            <when value="false">
            </when>
        </conditional>
        <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?"/>
        <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used"/>
        <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" />
        <param argument="--busco_lineage" type="boolean" truevalue="--busco_lineage" falsevalue="" checked="false" label="Run BUSCO tool?" help="The BUSCO tool detects core genes in the assembly. Use this option to provide path to the BUSCO lineage data (Eukaryota, Metazoa, Arthropoda, Vertebrata or Fungi)."/>
        <param argument="--gene_mark" type="boolean" truevalue="--gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/>
        <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x_assembled/covered/matched metrics."/>
        <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x_assembled/covered/matched metrics."/>
        <param name="out_sr" type="select" multiple="true" label="Short report formats">
            <option value="tsv" selected="true">tabular</option>
            <option value="txt">txt</option>
            <option value="tex">tex</option>
            <option value="pdf" selected="true">pdf</option>
        </param>
        <param name="out_add" type="select" multiple="true" label="Additional outputs">
            <option value="logs">Logs</option>
            <option value="plots" selected="true">Plots (only for n>1)</option>
            <option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option>
            <option value="details" selected="true">Details per Chromosomes/scaffolds file</option>
            <option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option>
        </param>
    </inputs>

    <outputs>
        <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf">
            <filter>"pdf" in out_sr</filter>
        </data>
        <data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt">
            <filter>"txt" in out_sr</filter>
        </data>
        <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex">
            <filter>"tex" in out_sr</filter>
        </data>
        <data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv">
            <filter>"tsv" in out_sr</filter>
        </data>
        <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs" >
            <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log"  directory="outputdir/logs/" visible="false" />
            <filter>"logs" in out_add</filter>
        </collection>
        <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots" >
            <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png"  directory="outputdir/comparison_output/" visible="false" recurse="true"/>
            <filter> len(input)>1 and "plots" in out_add</filter>
        </collection>
        <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison" >
            <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt"  directory="outputdir/comparison_output/" visible="false" recurse="true" />
            <filter> len(input)>1 and "comparison" in out_add</filter>
        </collection>
        <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output">
            <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false"/>
            <filter>"details" in out_add</filter>
        </collection>
        <collection name="details_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots">
            <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;png)" directory="details/" visible="false"/>
            <filter>"details_plots" in out_add</filter>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="7">
            <param name="input" value="idba.fasta,Trinity.fasta" ftype="fasta" />
            <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
            <conditional name="gene_coordinates">
                <param name="use_gtf" value="true" />
                <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" />
                <param name="disable_infer_genes" value="true"/>
                <param name="disable_infer_transcripts" value="true"/>
            </conditional>
            <param name="out_sr" value="txt,tex,tsv" />
            <param name="out_add" value="logs,comparison,plots,details" />
            <output name="short_report_txt">
                <assert_contents>
                    <has_text text="SHORT SUMMARY REPORT"/>
                </assert_contents>
            </output>
            <output name="short_report_tex">
                <assert_contents>
                    <has_text text="Short summary report"/>
                    <has_text text="end{document}"/>
                </assert_contents>
            </output>
            <output name="short_report_tsv">
                <assert_contents>
                    <has_line_matching expression="^METRICS/TRANSCRIPTS\tidba\tTrinity$"/>
                </assert_contents>
            </output>
            <output_collection name="comparison_png" type="list" count="15"/>
            <output_collection name="comparison" type="list" count="19"/>
            <output_collection name="list_logs" type="list" count="8"/>
            <output_collection name="details" type="list:list" count="2">
                <output_collection name="Trinity" type="list" count="21"/>
		        <output_collection name="idba" type="list" count="21"/>
            </output_collection>
        </test>
        <test expect_num_outputs="8">
            <param name="input" value="Trinity.fasta" ftype="fasta" />
            <conditional name="gene_coordinates">
                <param name="use_gtf" value="false" />
            </conditional>
            <param name="min_alignment" value="30" />
            <param name="lower_threshold" value="45" />
            <param name="upper_threshold" value="95"/>
            <param name="out_sr" value="txt,tex,tsv,pdf" />
            <param name="out_add" value="logs,details_plots" />
            <output name="short_report_pdf" file="short_report.pdf" compare="sim_size"/>
            <output name="short_report_txt" file="short_report.txt" compare="sim_size"/>
            <output name="short_report_tex" file="short_report.tex" compare="sim_size"/>
            <output name="short_report_tsv" file="short_report.tsv" compare="sim_size"/>
            <output_collection name="list_logs" type="list">
                <element name="rnaQUAST" file="rnaQUAST"/>
                <element name="Trinity.GeneMarkS_T.err" file="spades.311.GeneMarkS_T.err"/>
            </output_collection>
            <output_collection name="details_png" type="list:list" count="1">
                <output_collection name="Trinity" type="list" count="11"/>
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[
**What is rnaQUAST**
- a quality assessment tool for de novo transcriptome assemblies
- evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database
- calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts
    
**Using rnaQuast without reference** you wont get:  
  
- x-assembled (Exons) 
- Alignments per Isoform 
- x-covered (Exons)
- x-matched (Blocks)
- gmap build logs
    
**Using rnaQuast with reference** you will get:
- Reports
- Logs
- Alignement/Basic Metrics
- Misassemblies/ Specificity/ Sensitivity
- Alignment multiplicity
- Block/ Transcript Lentgh 
- Blocks per alignment
- Mismatch rate
- x-aligned
- Nx 
- Blocks per alignment
- gmap build logs
    
**Using rnaQuast without gene coordinates** you wont get:
- x-assembled (Exons)
- Alignments per Isoform
- x-covered (Exons)
- x-matched (Blocks)
- gmap build logs
- Database Metrics
- Alignment multiplicity
- Mismatch rate
- NAx
- x-aligned 
**Using rnaQuast with gene coordinates** you will get:
- Reports
- Logs
- Alignement/Basic Metrics
- Misassemblies/Specificity/Sensitivity
- Alignment multiplicity
- Block/Transcript length
- Blocks per alignment
- Mismatch rate
- x-aligned
- Nx/NAx
- gmap build logs
- Database Metrics
- Alignment multiplicity
More informations, see citations.
    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btw218 </citation>
    </citations>
</tool>