Mercurial > repos > lehmanju > rnaquast
changeset 4:cc0366f0bdf7 draft
Uploaded
author | lehmanju |
---|---|
date | Fri, 16 Oct 2020 06:20:35 +0000 |
parents | bf3dc4cae5bf |
children | 3a150fca6d60 |
files | rna_quast.xml |
diffstat | 1 files changed, 99 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/rna_quast.xml Wed Oct 14 07:03:06 2020 +0000 +++ b/rna_quast.xml Fri Oct 16 06:20:35 2020 +0000 @@ -9,9 +9,65 @@ </xml> <xml name="element_has_text" token_name="" token_text=""> <element name="@NAME@"> - <assert_contents><has_text text="@TEXXT@"/></assert_contents> + <assert_contents><has_text text="@TEXT@"/></assert_contents> + </element> + </xml> + + <xml name="details_output_test" token_assembler=""> + <element name="@ASSEMBLER@"> + <element name="5000%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element> + <element name="9500%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element> + <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*"/> + <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*"/> + <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*"/> + <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*"/> + <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*"/> + <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*"/> + <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*"/> + <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*"/> + <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*"/> + <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*"/> + <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*"/> + <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*"/> + <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*"/> + <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*"/> + <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*"/> + <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*"/> + <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*"/> + <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*"/> + <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*"/> </element> </xml> + + <xml name="txt_output_test" token_assembler=""> + <output name="short_report_txt"> + <assert_contents> + <has_text text="SHORT SUMMARY REPORT"/> + </assert_contents> + </output> + </xml> + <xml name="tex_output_test" token_assembler=""> + <output name="short_report_tex"> + <assert_contents> + <has_text text="Short summary report"/> + <has_text text="end{document}"/> + </assert_contents> + </output> + </xml> + <xml name="tsv_output_test" token_assembler=""> + <output name="short_report_tsv"> + <assert_contents> + <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$"/> + </assert_contents> + </output> + </xml> + <xml name="pdf_output_test" token_assembler=""> + <output name="short_report_pdf"> + <assert_contents> + <has_text text="rnaQUAST short report"/> + </assert_contents> + </output> + </xml> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement> @@ -21,7 +77,7 @@ </stdio> <command detect_errors="exit_code"><![CDATA[ #import re - #for $i in $input + #for $i in $in_fasta ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' && #end for #if $r @@ -38,7 +94,7 @@ rnaQUAST.py --threads \${GALAXY_SLOTS:-1} --transcripts - #for $i in $input + #for $i in $in_fasta '${re.sub('[^\w\-.]', '_', i.element_identifier)}' #end for $strand_specific @@ -63,18 +119,22 @@ #end if $blat $busco_lineage - $gene_mark + ##GeneMarkS-T is not available in conda $gene_mark + $meta --lower_threshold $lower_threshold --upper_threshold $upper_threshold -o outputdir - && mkdir details - #for $i in $input + && mkdir details + ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output) + ## to a joint dir (details) to make them discoverable + ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work) + #for $i in $in_fasta #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0] && (for f in \$(find 'outputdir/'$basename'_output' -type f); do d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) && - mv \$f details/"\$d"_____"\$(basename \$f)"; + mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')"; done) #end for ## rename .list files to .txt files to make them detectable (format detection by extension) @@ -83,7 +143,7 @@ && true ]]></command> <inputs> - <param name="input" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/> + <param name="in_fasta" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/> <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific"/> <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" /> <conditional name="gene_coordinates"> @@ -103,7 +163,8 @@ <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used"/> <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" /> <param argument="--busco_lineage" type="boolean" truevalue="--busco_lineage" falsevalue="" checked="false" label="Run BUSCO tool?" help="The BUSCO tool detects core genes in the assembly. Use this option to provide path to the BUSCO lineage data (Eukaryota, Metazoa, Arthropoda, Vertebrata or Fungi)."/> - <param argument="--gene_mark" type="boolean" truevalue="--gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/> + <!-- GeneMarkS-T is not available in conda <param argument="\-\-gene_mark" type="boolean" truevalue="\-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/>--> + <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for Meta Transcriptome"/> <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x_assembled/covered/matched metrics."/> <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x_assembled/covered/matched metrics."/> <param name="out_sr" type="select" multiple="true" label="Short report formats"> @@ -138,13 +199,15 @@ <discover_datasets ext="txt" pattern="(?P<name>.+)\.log" directory="outputdir/logs/" visible="false" /> <filter>"logs" in out_add</filter> </collection> + <!-- note the output filter of the next two outputs checks if there is + more than 1 input for in_fasta (for 1 its a HDA, for more list or HDAs) --> <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots" > <discover_datasets ext="png" pattern="(?P<name>.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true"/> - <filter> len(input)>1 and "plots" in out_add</filter> + <filter> isinstance(in_fasta, list) and "plots" in out_add</filter> </collection> <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison" > <discover_datasets ext="txt" pattern="(?P<name>.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" /> - <filter> len(input)>1 and "comparison" in out_add</filter> + <filter> isinstance(in_fasta, list) and "comparison" in out_add</filter> </collection> <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output"> <discover_datasets pattern="(?P<identifier_0>.+)_____(?P<identifier_1>.+)\.(?P<ext>txt)" directory="details/" visible="false"/> @@ -157,7 +220,7 @@ </outputs> <tests> <test expect_num_outputs="7"> - <param name="input" value="idba.fasta,Trinity.fasta" ftype="fasta" /> + <param name="in_fasta" value="idba.fasta,Trinity.fasta" ftype="fasta" /> <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" /> <conditional name="gene_coordinates"> <param name="use_gtf" value="true" /> @@ -167,32 +230,19 @@ </conditional> <param name="out_sr" value="txt,tex,tsv" /> <param name="out_add" value="logs,comparison,plots,details" /> - <output name="short_report_txt"> - <assert_contents> - <has_text text="SHORT SUMMARY REPORT"/> - </assert_contents> - </output> - <output name="short_report_tex"> - <assert_contents> - <has_text text="Short summary report"/> - <has_text text="end{document}"/> - </assert_contents> - </output> - <output name="short_report_tsv"> - <assert_contents> - <has_line_matching expression="^METRICS/TRANSCRIPTS\tidba\tTrinity$"/> - </assert_contents> - </output> + <expand macro="txt_output_test"/> + <expand macro="tex_output_test"/> + <expand macro="tsv_output_test"/> <output_collection name="comparison_png" type="list" count="15"/> <output_collection name="comparison" type="list" count="19"/> <output_collection name="list_logs" type="list" count="8"/> <output_collection name="details" type="list:list" count="2"> - <output_collection name="Trinity" type="list" count="21"/> - <output_collection name="idba" type="list" count="21"/> + <expand macro="details_output_test" assembler="Trinity"/> + <expand macro="details_output_test" assembler="idba"/> </output_collection> </test> - <test expect_num_outputs="8"> - <param name="input" value="Trinity.fasta" ftype="fasta" /> + <test expect_num_outputs="6"> + <param name="in_fasta" value="Trinity.fasta" ftype="fasta" /> <conditional name="gene_coordinates"> <param name="use_gtf" value="false" /> </conditional> @@ -201,16 +251,20 @@ <param name="upper_threshold" value="95"/> <param name="out_sr" value="txt,tex,tsv,pdf" /> <param name="out_add" value="logs,details_plots" /> - <output name="short_report_pdf" file="short_report.pdf" compare="sim_size"/> - <output name="short_report_txt" file="short_report.txt" compare="sim_size"/> - <output name="short_report_tex" file="short_report.tex" compare="sim_size"/> - <output name="short_report_tsv" file="short_report.tsv" compare="sim_size"/> + + <expand macro="pdf_output_test"/> + <expand macro="tex_output_test"/> + <expand macro="tsv_output_test"/> + <expand macro="txt_output_test"/> <output_collection name="list_logs" type="list"> - <element name="rnaQUAST" file="rnaQUAST"/> - <element name="Trinity.GeneMarkS_T.err" file="spades.311.GeneMarkS_T.err"/> + <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text=""/> + <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!"/> </output_collection> <output_collection name="details_png" type="list:list" count="1"> - <output_collection name="Trinity" type="list" count="11"/> + <element name="Trinity"> + <expand macro="element_has_text" name="Nx" text="PNG"/> + <expand macro="element_has_text" name="transcript_length" text="PNG"/> + </element> </output_collection> </test> </tests> @@ -219,29 +273,25 @@ - a quality assessment tool for de novo transcriptome assemblies - evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database - calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts - -**Using rnaQuast without reference** you wont get: - -- x-assembled (Exons) -- Alignments per Isoform +**Using rnaQuast without reference** you wont get: +- x-assembled (Exons) +- Alignments per Isoform - x-covered (Exons) - x-matched (Blocks) - gmap build logs - **Using rnaQuast with reference** you will get: - Reports - Logs - Alignement/Basic Metrics - Misassemblies/ Specificity/ Sensitivity - Alignment multiplicity -- Block/ Transcript Lentgh +- Block/ Transcript Lentgh - Blocks per alignment - Mismatch rate - x-aligned -- Nx +- Nx - Blocks per alignment - gmap build logs - **Using rnaQuast without gene coordinates** you wont get: - x-assembled (Exons) - Alignments per Isoform @@ -252,7 +302,7 @@ - Alignment multiplicity - Mismatch rate - NAx -- x-aligned +- x-aligned **Using rnaQuast with gene coordinates** you will get: - Reports - Logs