comparison rna_quast.xml @ 5:f89e3c318453 draft

planemo upload for repository https://git.ufz.de/lehmanju/rnaquast commit c633f5c634128e3c81ab48e94df6f703dd005c46
author iuc
date Wed, 07 Jun 2023 12:02:03 +0000
parents f9f2ad782d8f
children 8e66f695d859
comparison
equal deleted inserted replaced
4:f9f2ad782d8f 5:f89e3c318453
1 <tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> 1 <tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>A quality assessment tool for De Novo transcriptome assemblies</description> 2 <description>A quality assessment tool for De Novo transcriptome assemblies</description>
3 <xrefs>
4 <xref type="bio.tools">rnaQUAST</xref>
5 </xrefs>
6 <macros> 3 <macros>
7 <token name="@TOOL_VERSION@">2.2.1</token> 4 <import>macros.xml</import>
8 <token name="@VERSION_SUFFIX@">1</token>
9 <xml name="element_matching_line" token_name="" token_expression="">
10 <element name="@NAME@">
11 <assert_contents>
12 <has_line_matching expression="@EXPRESSION@" />
13 </assert_contents>
14 </element>
15 </xml>
16 <xml name="element_has_text" token_name="" token_text="">
17 <element name="@NAME@">
18 <assert_contents>
19 <has_text text="@TEXT@" />
20 </assert_contents>
21 </element>
22 </xml>
23 <xml name="details_output_test" token_assembler="">
24 <element name="@ASSEMBLER@">
25 <element name="5000%-assembled.list">
26 <assert_contents>
27 <has_n_lines n="0" />
28 </assert_contents>
29 </element>
30 <element name="9500%-assembled.list">
31 <assert_contents>
32 <has_n_lines n="0" />
33 </assert_contents>
34 </element>
35 <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*" />
36 <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*" />
37 <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*" />
38 <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*" />
39 <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*" />
40 <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*" />
41 <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*" />
42 <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*" />
43 <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*" />
44 <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*" />
45 <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*" />
46 <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*" />
47 <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*" />
48 <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*" />
49 <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*" />
50 <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*" />
51 <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*" />
52 <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*" />
53 <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*" />
54 </element>
55 </xml>
56
57 <xml name="txt_output_test" token_assembler="">
58 <output name="short_report_txt">
59 <assert_contents>
60 <has_text text="SHORT SUMMARY REPORT" />
61 </assert_contents>
62 </output>
63 </xml>
64 <xml name="tex_output_test" token_assembler="">
65 <output name="short_report_tex">
66 <assert_contents>
67 <has_text text="Short summary report" />
68 <has_text text="end{document}" />
69 </assert_contents>
70 </output>
71 </xml>
72 <xml name="tsv_output_test" token_assembler="">
73 <output name="short_report_tsv">
74 <assert_contents>
75 <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$" />
76 </assert_contents>
77 </output>
78 </xml>
79 <xml name="pdf_output_test" token_assembler="">
80 <output name="short_report_pdf">
81 <assert_contents>
82 <has_text text="rnaQUAST short report" />
83 </assert_contents>
84 </output>
85 </xml>
86 </macros> 5 </macros>
87 <requirements> 6 <expand macro='xrefs'/>
88 <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement> 7 <expand macro='requirements'/>
89 </requirements>
90 <stdio> 8 <stdio>
91 <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" /> 9 <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" />
92 </stdio> 10 </stdio>
93 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
94 #import re 12 mkdir -p './complete_reports/' &&
95 #for $i in $transcripts 13 mkdir -p './fasta_files/' &&
14 #import os, re, glob
15 #for $i in $transcripts
96 ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' && 16 ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
97 #end for 17 #end for
98 #if $r 18 #if $reference
99 #for $rf in $r 19 #for $rf in $reference
100 ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' && 20 ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' &&
101 #end for 21 #end for
102 #end if 22 #end if
103 #if $gene_coordinates.use_gtf == "true" 23 #if $gene_coordinates.selector == "true"
104 #for $g in $gene_coordinates.gtf 24 #for $g in $gene_coordinates.gtf
105 ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' && 25 ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' &&
106 #end for 26 #end for
107 #end if 27 #end if
108 mkdir outputdir && 28 mkdir outputdir &&
109 rnaQUAST.py 29 rnaQUAST.py
110 --threads \${GALAXY_SLOTS:-1} 30 --threads \${GALAXY_SLOTS:-8}
111 --transcripts 31 --transcripts
112 #for $i in $transcripts 32 #for $i in $transcripts
113 '${re.sub('[^\w\-.]', '_', i.element_identifier)}' 33 '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
114 #end for 34 #end for
115 $strand_specific 35 #if $reads_option.selector == 'paired'
116 #if $r 36 --left_reads '${reads_option.forward_reads}'
37 --right_reads '${reads_option.reverse_reads}'
38 #else if $reads_option.selector == 'single'
39 --single_reads '${reads_option.single_reads}'
40 #end if
41 $advanced_options.strand_specific
42 #if $reads_alignment
43 --reads_alignment '${reads_alignment}'
44 #end if
45 #if $reference
117 -r 46 -r
118 #for $rf in $r 47 #for $rf in $reference
119 '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' 48 '${re.sub('[^\w\-.]', '_', rf.element_identifier)}'
120 #end for 49 #end for
121 #end if 50 #end if
122 #if $gene_coordinates.use_gtf == "true" 51 #if $gene_coordinates.selector == "true"
123 --gtf 52 --gtf
124 #for $g in $gene_coordinates.gtf 53 #for $g in $gene_coordinates.gtf
125 '${re.sub('[^\w\-.]', '_', g.element_identifier)}' 54 '${re.sub('[^\w\-.]', '_', g.element_identifier)}'
126 #end for 55 #end for
127 $gene_coordinates.disable_infer_genes 56 $gene_coordinates.disable_infer_genes
128 $gene_coordinates.disable_infer_transcripts 57 $gene_coordinates.disable_infer_transcripts
129 #end if 58 #end if
130 $prokaryote 59 $advanced_options.prokaryote
131 --min_alignment '$min_alignment' 60 --min_alignment $advanced_options.min_alignment
132 #if "pdf" not in $out_sr and "plots" not in $out_add 61 $advanced_options.blat
62
63 #if "pdf" not in $output_options.out_sr
133 --no_plots 64 --no_plots
134 #end if 65 #end if
135 $blat 66 #if $use_busco.selector == 'true'
136 #if $busco_option.busco == 'true' 67 --busco
137 --busco $busco_option.lineage 68 #if $use_busco.lineage_conditional.selector == 'cached':
138 #end if 69 '${use_busco.lineage_conditional.cached_db.fields.path}'
139 ##$gene_mark 70 #else
140 $meta 71 $use_busco.lineage
141 --lower_threshold $lower_threshold 72 #end if
142 --upper_threshold $upper_threshold 73 #end if
74 ## $advanced_options.gene_mark
75 $advanced_options.meta
76 --lower_threshold $advanced_options.lower_threshold
77 --upper_threshold $advanced_options.upper_threshold
143 -o outputdir 78 -o outputdir
144 79
145 && mkdir details 80 #if 'gz' in $output_options.out_add
146 81 && tar -czvf results.tar.gz './outputdir'
147 ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output) 82 #end if
148 ## to a joint dir (details) to make them discoverable 83
149 ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work) 84 #if len($transcripts) == 1
150 #for $i in $transcripts 85 #set $path = "/".join(['outputdir',($transcripts[0].element_identifier).split(".")[0]]) + "_output"
151 #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0] 86 && mv '${path}' './results'
152 && 87 ## rename .list files to .txt files to make them detectable
153 (for f in \$(find 'outputdir/'$basename'_output' -type f); 88 && find './results/' -name "*.list" -exec mv {} {}.txt \;
154 do 89 && true
155 d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) && 90 && printf "************ METRICS/TRANSCRIPTS ***************\n" > stats.txt
156 mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')"; 91 && for file_name in ./results/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt
157 done) 92 && sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt;
158 #end for 93 done
159 94 && cat stats.txt > $stats
160 ## rename .list files to .txt files to make them detectable (format detection by extension) 95 #if $gene_coordinates.selector == 'true' and $reference
161 ## the final `true` seems needed since otherwise the `;` at the end is swallowed 96 && mv ./results/*fasta ./fasta_files/
162 && find details/ -name "*.list" -exec mv {} {}.txt \; 97 #end if
163 && true 98 #else
99 && mkdir -p './results/'
100 #if $gene_coordinates.selector == 'true' and $reference
101 #for $i, $transcript in enumerate($transcripts)
102 #set $path = "/".join(['outputdir',($transcripts[$i].element_identifier).split(".")[0]]) + "_output"
103 && rm -r ./results
104 && cp -r $path './results'
105 && mv ./results/*fasta './fasta_files/'
106 #end for
107 #end if
108 && find './outputdir/comparison_output' -name "*.list" -exec mv {} {}.txt \;
109 && true
110 && printf "************ COMPARISON METRICS ***************\n" > stats.txt
111 && for file_name in ./outputdir/comparison_output/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt
112 && sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt; done
113 && cat stats.txt > $stats
114 #end if
164 ]]> </command> 115 ]]> </command>
165 <inputs> 116 <inputs>
166 <param argument="--transcripts" type="data" format="fasta" multiple="true" label="Transcripts" help="File(s) with transcripts in FASTA format."/> 117 <param argument="--transcripts" type="data" format="fasta" multiple="true" label="Transcripts" help="File(s) with transcripts in FASTA format."/>
167 <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific" 118 <conditional name="reads_option">
168 help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/> 119 <param name="selector" type="select" label="Single-end or paired-end reads">
169 <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." /> 120 <option value="" selected="true">Disabled-end</option>
121 <option value="single" selected="true">Single-end</option>
122 <option value="paired">Paired-end (as individual datasets)</option>
123 </param>
124 <when value=""/>
125 <when value="single">
126 <param format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" name="single_reads" type="data" label="RNA-Seq FASTQ/FASTA file"/>
127 </when>
128 <when value="paired">
129 <param name="forward_reads" format="fastq,fastq.gz,fastqsanger ,fastqsanger.gz" type="data" label="RNA-Seq FASTQ/FASTA file, forward reads"/>
130 <param name="reverse_reads" format="fastq,fastq.gz,fastqsanger, fastqsanger.gz" type="data" label="RNA-Seq FASTQ/FASTA file, reverse reads"/>
131 </when>
132 </conditional>
133 <param argument="--reference" type="data" format="fasta" label="Reference genome" multiple="true" optional="true" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." />
170 <conditional name="gene_coordinates"> 134 <conditional name="gene_coordinates">
171 <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl."> 135 <param name="selector" type="select" label="Genome annotation" help="Genome annotation file. We recommend to use files downloaded from GENCODE or Ensembl.">
172 <option value="true" selected="true">Yes</option> 136 <option value="true">Enabled</option>
173 <option value="false">No</option> 137 <option value="false" selected="true">Disabled</option>
174 </param> 138 </param>
175 <when value="true"> 139 <when value="true">
176 <param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" /> 140 <param argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" />
177 <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?" 141 <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" Disable infer genes"
178 help="Use this option if your GTF file already contains genes records, otherwise gffutils will fix it. Note that gffutils may work for quite a long time"/> 142 help="Use this option if your GTF file already contains genes records, otherwise gffutils will fix it. Note that gffutils may work for quite a long time"/>
179 <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/> 143 <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="Disable infer transcripts" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/>
180 </when> 144 </when>
181 <when value="false"> 145 <when value="false">
182 </when> 146 </when>
183 </conditional> 147 </conditional>
184 <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?" help="Use this option if the genome is prokaryotic."/> 148 <param argument="--reads_alignment" type="data" format="sam" label="Aligned reads to reference genome" optional="true" help="File with read alignments to the reference genome" />
185 <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used" help="Default value is 50"/> 149 <conditional name="use_busco">
186 <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" help="Blat is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " /> 150 <param argument="selector" type="select" label="Run BUSCO" help="BUSCO allows to detect core genes in the assembled transcripts">
187 <conditional name="busco_option">
188 <param argument="--busco" type="select" label="Run BUSCO tool?" help="BUSCO allows to detect core genes in the assembled transcripts">
189 <option value="false">Disabled</option> 151 <option value="false">Disabled</option>
190 <option value="true">Enabled</option> 152 <option value="true">Enabled</option>
191 </param> 153 </param>
192 <when value="false"/> 154 <when value="false"/>
193 <when value="true"> 155 <when value="true">
194 <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO"> 156 <conditional name="lineage_conditional">
195 <option value="metazoa">Metazoa</option> 157 <param name="selector" type="select" label="Lineage data source">
196 <option value="eukaryota">Eukaryota</option> 158 <option value="download">Download lineage data</option>
197 <option value="arthropoda">Arthropoda</option> 159 <option value="cached" selected="true">Use cached lineage data</option>
198 <option value="vertebrata">Vertebrata</option> 160 </param>
199 <option value="fungi">Fungi</option> 161 <when value="cached">
200 <option value="bacteria">Bacteria</option> 162 <param name="cached_db" label="Cached database with lineage" type="select">
201 </param> 163 <options from_data_table="busco_database">
164 <validator message="No BUSCO database is available" type="no_options" />
165 </options>
166 </param>
167 </when>
168 <when value="download">
169 <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO">
170 <option value="metazoa">Metazoa</option>
171 <option value="eukaryota">Eukaryota</option>
172 <option value="arthropoda">Arthropoda</option>
173 <option value="vertebrata">Vertebrata</option>
174 <option value="fungi">Fungi</option>
175 <option value="bacteria">Bacteria</option>
176 </param>
177 </when>
178 </conditional>
202 </when> 179 </when>
203 </conditional> 180 </conditional>
204 <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/--> 181 <section name="advanced_options" title="Advaced options" >
205 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" /> 182 <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific RNA-seq data"
206 <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." /> 183 help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/>
207 <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." /> 184 <param argument="--min_alignment" type="integer" min="0" value="50" label="Minimal alignment length to be used" help="Default value is 50"/>
208 <param name="out_sr" type="select" multiple="true" label="Short report formats"> 185 <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT instead of GMAP" help="BALT is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " />
209 <option value="tsv" selected="true">tabular</option> 186 <!-- GeneMarkST is not in Bioconda -->
210 <option value="txt">txt</option> 187 <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"
211 <option value="tex">tex</option> 188 help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/-->
212 <option value="pdf" selected="true">pdf</option> 189 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" />
213 </param> 190 <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." />
214 <param name="out_add" type="select" multiple="true" label="Additional outputs"> 191 <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." />
215 <option value="logs">Logs</option> 192 <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Prokararyotic organism(s)" help="Use this option if the genome is prokaryotic"/>
216 <option value="plots" selected="true">Plots (only for n>1)</option> 193 </section>
217 <option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option> 194 <section name="output_options" title="Output options" expanded="true">
218 <option value="details" selected="true">Details per Chromosomes/scaffolds file</option> 195 <param name="out_sr" type="select" multiple="true" display="checkboxes" label="Short report formats">
219 <option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option> 196 <option value="tabular">Tabular</option>
220 </param> 197 <option value="tex">TeX</option>
198 <option value="pdf" selected="true">PDF</option>
199 </param>
200 <param name="out_add" type="select" label="Additional outputs" multiple="true" display="checkboxes">
201 <option value="complete">Complete report</option>
202 <option value="fasta" >FASTA files</option>
203 <option value="logs">Logs</option>
204 <option value="gz">Compressed output folder</option>
205 </param>
206 </section>
221 </inputs> 207 </inputs>
222
223 <outputs> 208 <outputs>
224 <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf"> 209 <data name="stats" format="txt" label="${tool.name} on ${on_string}: complete report">
225 <filter>"pdf" in out_sr</filter> 210 <filter>output_options['out_add'] and "complete" in output_options['out_add']</filter>
226 </data>
227 <data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt">
228 <filter>"txt" in out_sr</filter>
229 </data>
230 <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex">
231 <filter>"tex" in out_sr</filter>
232 </data>
233 <data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv">
234 <filter>"tsv" in out_sr</filter>
235 </data> 211 </data>
236 <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs"> 212 <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs">
237 <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs/" visible="false" /> 213 <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs" visible="false" />
238 <filter>"logs" in out_add</filter> 214 <filter>output_options['out_add'] and "logs" in output_options['out_add']</filter>
239 </collection> 215 </collection>
240 <!-- note the output filter of the next two outputs checks if there is 216 <collection name="fasta_files" type="list" label="${tool.name} on ${on_string}: FASTA files">
241 more than 1 input for transcripts (for 1 its a HDA, for more list or HDAs) --> 217 <discover_datasets ext="fasta" pattern="(?P&lt;name&gt;.+)\.fasta" directory="fasta_files" visible="false" />
242 <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots"> 218 <filter>output_options['out_add'] and "fasta" in output_options['out_add']</filter>
243 <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true" /> 219 <filter>gene_coordinates['selector'] == 'true'</filter>
244 <filter> isinstance(transcripts, list) and "plots" in out_add</filter> 220 <filter>reference</filter>
245 </collection> 221 </collection>
246 <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison"> 222 <data name="compressed_files" format="tgz" label="${tool.name} on ${on_string}: compressed results folder" from_work_dir="results.tar.gz">
247 <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" /> 223 <filter>output_options['out_add'] and "gz" in output_options['out_add']</filter>
248 <filter> isinstance(transcripts, list) and "comparison" in out_add</filter> 224 </data>
249 </collection> 225 <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: short report (pdf)" from_work_dir="outputdir/short_report.pdf">
250 <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output"> 226 <filter>output_options['out_sr'] and "pdf" in output_options['out_sr']</filter>
251 <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false" /> 227 </data>
252 <filter>"details" in out_add</filter> 228 <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: short report (tex)" from_work_dir="outputdir/short_report.tex">
253 </collection> 229 <filter>output_options['out_sr'] and "tex" in output_options['out_sr']</filter>
254 <collection name="details_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots"> 230 </data>
255 <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;png)" directory="details/" visible="false" /> 231 <data name="short_report_tabular" format="tabular" label="${tool.name} on ${on_string}: short report (tabular)" from_work_dir="outputdir/short_report.tsv">
256 <filter>"details_plots" in out_add</filter> 232 <filter>output_options['out_sr'] and "tabular" in output_options['out_sr']</filter>
257 </collection> 233 </data>
258 </outputs> 234 </outputs>
259 <tests> 235 <tests>
260 <test expect_num_outputs="7"> 236 <!-- Test 01: Minimum input txt output-->
261 <param name="transcripts" value="idba.fasta,Trinity.fasta" ftype="fasta" /> 237 <test expect_num_outputs="1">
262 <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" /> 238 <param name="transcripts" value="transcriptome01.fasta"/>
263 <conditional name="gene_coordinates"> 239 <section name="output_options">
264 <param name="use_gtf" value="true" /> 240 <param name="out_sr" value="tabular"/>
265 <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" /> 241 </section>
266 <param name="disable_infer_genes" value="true" /> 242 <output name="short_report_tabular" file="test_01_short_report.tab"/>
267 <param name="disable_infer_transcripts" value="true" /> 243 </test>
268 </conditional> 244 <!-- Test 02: Transcriptome reference,single read, txt output-->
269 <param name="out_sr" value="txt,tex,tsv" /> 245 <test expect_num_outputs="1">
270 <param name="out_add" value="logs,comparison,plots,details" /> 246 <param name="transcripts" value="transcriptome01.fasta"/>
271 <expand macro="txt_output_test" /> 247 <section name="output_options">
272 <expand macro="tex_output_test" /> 248 <param name="out_sr" value="tabular"/>
273 <expand macro="tsv_output_test" /> 249 </section>
274 <output_collection name="comparison_png" type="list" count="15" /> 250 <conditional name="reads_option">
275 <output_collection name="comparison" type="list" count="19" /> 251 <param name="selector" value="single"/>
276 <output_collection name="list_logs" type="list" count="8" /> 252 <param name="single_reads" value="single_end.fastq.gz"/>
277 <output_collection name="details" type="list:list" count="2"> 253 </conditional>
278 <expand macro="details_output_test" assembler="Trinity" /> 254 <output name="short_report_tabular">
279 <expand macro="details_output_test" assembler="idba" /> 255 <assert_contents>
256 <has_text text="Transcripts" />
257 <has_size value="95" delta="5"/>
258 </assert_contents>
259 </output>
260 </test>
261 <!-- Test 03: Transcriptome reference and annotation, txt output-->
262 <test expect_num_outputs="1">
263 <param name="transcripts" value="transcriptome01.fasta"/>
264 <conditional name="gene_coordinates">
265 <param name="selector" value="true"/>
266 <param name="gtf" value="reference.gtf"/>
267 </conditional>
268 <section name="output_options">
269 <param name="out_sr" value="tabular"/>
270 </section>
271 <conditional name="reads_option">
272 <param name="selector" value="single"/>
273 <param name="single_reads" value=""/>
274 </conditional>
275 <output name="short_report_tabular" file="test_03_short_report.tab"/>
276 </test>
277 <!-- Test 04: Transcriptome reference and annotation, txt output-->
278 <test expect_num_outputs="1">
279 <param name="transcripts" value="transcriptome01.fasta"/>
280 <conditional name="gene_coordinates">
281 <param name="selector" value="true"/>
282 <param name="gtf" value="reference.gtf"/>
283 </conditional>
284 <section name="output_options">
285 <param name="out_sr" value="tabular"/>
286 </section>
287 <conditional name="reads_option">
288 <param name="selector" value="single"/>
289 <param name="single_reads" value="single_end.fastq.gz"/>
290 </conditional>
291 <output name="short_report_tabular">
292 <assert_contents>
293 <has_text text="Transcripts" />
294 <has_size value="140" delta="5"/>
295 </assert_contents>
296 </output>
297 </test>
298 <!-- Test 05: Transcriptome reference, annotation and mapping, txt output-->
299 <test expect_num_outputs="1">
300 <param name="transcripts" value="transcriptome01.fasta"/>
301 <conditional name="gene_coordinates">
302 <param name="selector" value="true"/>
303 <param name="gtf" value="reference.gtf"/>
304 </conditional>
305 <section name="output_options">
306 <param name="out_sr" value="tabular"/>
307 </section>
308 <conditional name="reads_option">
309 <param name="selector" value='paired'/>
310 <param name="forward_reads" value="input_F.fastqsanger"/>
311 <param name="reverse_reads" value="input_F.fastqsanger"/>
312 </conditional>
313 <output name="short_report_tabular">
314 <assert_contents>
315 <has_text text="Transcripts" />
316 <has_size value="140" delta="5"/>
317 </assert_contents>
318 </output>
319 </test>
320 <!-- Test 06: Transcriptome reference, annotation, mapping and BUSCO, txt output-->
321 <test expect_num_outputs="1">
322 <param name="transcripts" value="transcriptome01.fasta"/>
323 <conditional name="gene_coordinates">
324 <param name="selector" value="true"/>
325 <param name="gtf" value="reference.gtf"/>
326 </conditional>
327 <conditional name="reads_option">
328 <param name="selector" value='paired'/>
329 <param name="forward_reads" value="input_F.fastqsanger"/>
330 <param name="reverse_reads" value="input_R.fastqsanger"/>
331 </conditional>
332 <section name="output_options">
333 <param name="out_sr" value="tabular"/>
334 </section>
335 <conditional name="use_busco">
336 <param name="selector" value="true"/>
337 <conditional name="lineage_conditional">
338 <param name="selector" value="cached"/>
339 <param name="cached_db" value="busco-demo-db-20230328"/>
340 </conditional>
341 </conditional>
342 <output name="short_report_tabular">
343 <assert_contents>
344 <has_text text="Transcripts" />
345 <has_size value="140" delta="5"/>
346 </assert_contents>
347 </output>
348
349 </test>
350 <!-- Test 07: Transcriptome reference, annotation, mapping and BUSCO, additional outputs-->
351 <test expect_num_outputs="4">
352 <param name="transcripts" value="transcriptome01.fasta"/>
353 <conditional name="gene_coordinates">
354 <param name="selector" value="true"/>
355 <param name="gtf" value="reference.gtf"/>
356 </conditional>
357 <param name="reference" value="reference.fasta"/>
358 <conditional name="reads_option">
359 <param name="selector" value='paired'/>
360 <param name="forward_reads" value="input_F.fastqsanger"/>
361 <param name="reverse_reads" value="input_R.fastqsanger"/>
362 </conditional>
363 <conditional name="use_busco">
364 <param name="selector" value="true"/>
365 <conditional name="lineage_conditional">
366 <param name="selector" value="cached"/>
367 <param name="cached_db" value="busco-demo-db-20230328"/>
368 </conditional>
369 </conditional>
370 <section name="output_options">
371 <param name="out_sr" value="pdf,tabular"/>
372 <param name="out_add" value="fasta,gz"/>
373 </section>
374 <output_collection name="fasta_files" type="list" count="7">
375 <element name="transcriptome01.paralogs" file="test_07_paralogs.fasta" ftype="fasta"/>
280 </output_collection> 376 </output_collection>
281 </test> 377 <output name="short_report_pdf" file="test_07_short_report.pdf" ftype="pdf" compare="sim_size" delta="1000"/>
378 <output name="short_report_tabular" file="test_07_short_report.tab" ftype="tabular"/>
379 <output name="compressed_files" ftype="tgz">
380 <assert_contents>
381 <has_size value="281260" delta="250"/>
382 </assert_contents>
383 </output>
384 </test>
385 <!-- Test 08: Multiple inputs-->
282 <test expect_num_outputs="6"> 386 <test expect_num_outputs="6">
283 <param name="transcripts" value="Trinity.fasta" ftype="fasta" /> 387 <param name="transcripts" value="transcriptome01.fasta,transcriptome02.fasta"/>
284 <conditional name="gene_coordinates"> 388 <param name="reference" value="reference.fasta"/>
285 <param name="use_gtf" value="false" /> 389 <conditional name="gene_coordinates">
286 </conditional> 390 <param name="selector" value="true"/>
287 <param name="min_alignment" value="30" /> 391 <param name="gtf" value="reference.gtf"/>
288 <param name="lower_threshold" value="45" /> 392 </conditional>
289 <param name="upper_threshold" value="95" /> 393 <section name="output_options">
290 <param name="out_sr" value="txt,tex,tsv,pdf" /> 394 <param name="out_sr" value="tabular,pdf"/>
291 <param name="out_add" value="logs,details_plots" /> 395 </section>
292 396 <conditional name="use_busco">
293 <expand macro="pdf_output_test" /> 397 <param name="selector" value="true"/>
294 <expand macro="tex_output_test" /> 398 <conditional name="lineage_conditional">
295 <expand macro="tsv_output_test" /> 399 <param name="selector" value="cached"/>
296 <expand macro="txt_output_test" /> 400 <param name="cached_db" value="busco-demo-db-20230328"/>
297 <output_collection name="list_logs" type="list"> 401 </conditional>
298 <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" /> 402 </conditional>
299 <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" /> 403 <param name="out_add" value="complete,fasta,logs,gz"/>
404 <conditional name="reads_option">
405 <param name="selector" value="single"/>
406 <param name="single_reads" value="single_end.fastq.gz"/>
407 </conditional>
408 <output name="short_report_tabular" value="test_08_short_report.tab" ftype="tabular"/>
409 <output name="short_report_pdf" value="test_08_short_report.pdf" ftype="pdf"/>
410 <output name="stats" value="test_08_complete_report.tab" ftype="txt" lines_diff="6" />
411 <output_collection name="fasta_files" type="list" count="14">
412 <element name="transcriptome01.paralogs" file="test_08_paralogs.fasta" ftype="fasta"/>
300 </output_collection> 413 </output_collection>
301 <output_collection name="details_png" type="list:list" count="1"> 414 <output_collection name="list_logs" type="list" count="14">
302 <element name="Trinity"> 415 <element name="STAR.out" ftype="txt">
303 <expand macro="element_has_text" name="Nx" text="PNG" /> 416 <assert_contents>
304 <expand macro="element_has_text" name="transcript_length" text="PNG" /> 417 <has_text text="STAR --runThreadN"/>
418 <has_text text="finished successfully"/>
419 </assert_contents>
420 </element>
421 <element name="gmap_build.out" ftype="txt">
422 <assert_contents>
423 <has_text text="No alternate scaffolds observed"/>
424 </assert_contents>
425 </element>
426 <element name="rnaQUAST" ftype="txt">
427 <assert_contents>
428 <has_text text="THE QUALITY OF TRANSCRIPTOME ASSEMBLY DONE"/>
429 <has_text text="Thank you for using rnaQUAST!"/>
430 </assert_contents>
305 </element> 431 </element>
306 </output_collection> 432 </output_collection>
307 </test> 433 </test>
308 <test expect_num_outputs="6"> 434
309 <param name="transcripts" value="Trinity.fasta" ftype="fasta" />
310 <conditional name="gene_coordinates">
311 <param name="use_gtf" value="false" />
312 </conditional>
313 <param name="min_alignment" value="30" />
314 <param name="lower_threshold" value="45" />
315 <param name="upper_threshold" value="95" />
316 <param name="out_sr" value="txt,tex,tsv,pdf" />
317 <param name="out_add" value="logs,details_plots" />
318 <conditional name="busco_option">
319 <param name="busco" value="true"/>
320 <param name="lineage" value="metazoa"/>
321 </conditional>
322 <expand macro="pdf_output_test" />
323 <expand macro="tex_output_test" />
324 <expand macro="tsv_output_test" />
325 <expand macro="txt_output_test" />
326 <output_collection name="list_logs" type="list">
327 <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" />
328 <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" />
329 </output_collection>
330 <output_collection name="details_png" type="list:list" count="1">
331 <element name="Trinity">
332 <expand macro="element_has_text" name="Nx" text="PNG" />
333 <expand macro="element_has_text" name="transcript_length" text="PNG" />
334 </element>
335 </output_collection>
336 <assert_command>
337 <has_text text="--busco metazoa"/>
338 </assert_command>
339 </test>
340 </tests> 435 </tests>
341 <help><![CDATA[ 436 <help><![CDATA[
342 **What is rnaQUAST** 437
343 - a quality assessment tool for de novo transcriptome assemblies 438 .. class:: infomark
344 - evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database 439
345 - calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts 440 **Purpose**
346 441
347 **Using rnaQuast without reference** you wont get: 442 rnaQUAST is a tool for evaluating RNA-Seq assemblies using reference genome and gene database. In addition, rnaQUAST is also capable
348 443 of estimating gene database coverage by raw reads and de novo quality assessment.
349 - x-assembled (Exons) 444
350 - Alignments per Isoform 445 .. class:: infomark
351 - x-covered (Exons) 446
352 - x-matched (Blocks) 447 **rnaQUAST pipeline**
353 - gmap build logs 448
354 449 To evaluate quality of the assembled transcripts, rnaQUAST takes a reference genome in FASTA format and optionally its gene database in
355 **Using rnaQuast with reference** you will get: 450 GFF/GTF format. A user can provide either a FASTA file with transcripts, which will be aligned to the given reference genome using GMAP
356 - Reports 451 or BLAT. The alignments are analyzed to calculate simple metrics and then are matched against the isoforms from the gene database in order
357 - Logs 452 to obtain statistics that represent completeness and correctness levels of the assembly. In addition, rnaQUAST is capable of estimating
358 - Alignement/Basic Metrics 453 gene database coverage by raw reads using STAR or TopHat2. For de novo quality assessment when reference genome and gene database are
359 - Misassemblies/ Specificity/ Sensitivity 454 unavailable, the transcripts are analyzed using BUSCO.
360 - Alignment multiplicity 455
361 - Block/ Transcript Lentgh 456 .. class:: infomark
362 - Blocks per alignment 457
363 - Mismatch rate 458 **Metrics and alignment analysis**
364 - x-aligned 459
365 - Nx 460 rnaQUAST calculates various metrics without using alignment information, e.g. length distribution and N50 of the assembled transcripts.
366 - Blocks per alignment 461 Additionally, rnaQUAST computes the following statistics for the gene database: the total number of genes and isoforms, isoform and exon
367 - gmap build logs 462 length distribution, average number of exons per gene, etc.
368 463
369 **Using rnaQuast without gene coordinates** you wont get: 464 To analyze transcripts' alignments, rnaQUAST firstly filters out short partial alignments (shorter than a user-defined threshold, default
370 - x-assembled (Exons) 465 value is 50 bp). Such short alignments are typically caused by genomic repeats and thus are ignored. Afterwards, rnaQUAST selects the
371 - Alignments per Isoform 466 best-scored spliced alignment for each transcript. If a transcript has more than one alignment with the highest score, it is reported
372 - x-covered (Exons) 467 as multiply aligned. Otherwise, it is considered to be uniquely aligned. If the best-scored alignment is discordant (e.g. the transcript
373 - x-matched (Blocks) 468 has partial alignments that are either mapped to different strands or to different chromosomes) the transcript is classified as misassembled.
374 - gmap build logs 469 Transcripts without misassemblies are analyzed to calculate such metrics as average transcript alignment fraction and mismatch rate.
375 - Database Metrics 470
376 - Alignment multiplicity 471 For the simplicity of explanation, transcript is further referred to as a sequence generated by the assembler and isoform denotes a sequence
377 - Mismatch rate 472 from the gene database. rnaQUAST matches best-scored alignments of non-misassembled transcripts to the isoforms' coordinates and analyzes
378 - NAx 473 them to estimate how well the isoforms are covered by the assembly. rnaQUAST computes such metrics as database coverage (the total number
379 - x-aligned 474 of covered bases of all isoforms divided by the total length of all isoforms) and the number of 50%/95%-assembled isoforms. An isoform is
380 **Using rnaQuast with gene coordinates** you will get: 475 considered to be x%-assembled if it has at least x% covered by a single transcript. Vice versa, to evaluate how well the assembled
381 - Reports 476 transcripts are covered by the isoforms, rnaQUAST estimates the number of unannotated transcripts (that align to the genome, but do not
382 - Logs 477 match to any isoform) and the number of 50%/95%-matched transcripts (that have corresponding fraction mapped to an isoform). Indeed, the
383 - Alignement/Basic Metrics 478 thresholds described above (50% and 95%) can be varied by the user.
384 - Misassemblies/Specificity/Sensitivity 479
385 - Alignment multiplicity 480
386 - Block/Transcript length
387 - Blocks per alignment
388 - Mismatch rate
389 - x-aligned
390 - Nx/NAx
391 - gmap build logs
392 - Database Metrics
393 - Alignment multiplicity
394 More informations, see citations.
395 ]]> </help> 481 ]]> </help>
396 <citations> 482 <citations>
397 <citation type="doi">10.1093/bioinformatics/btw218 </citation> 483 <citation type="doi">10.1093/bioinformatics/btw218 </citation>
398 </citations> 484 </citations>
399 </tool> 485 </tool>