3
|
1 <tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@">
|
0
|
2 <description>A Quality Assessment Tool for De Novo Transcriptome Assemblies</description>
|
|
3 <macros>
|
|
4 <token name="@TOOL_VERSION@">2.1.0</token>
|
3
|
5 <xml name="element_matching_line" token_name="" token_expression="">
|
|
6 <element name="@NAME@">
|
|
7 <assert_contents><has_line_matching expression="@EXPRESSION@"/></assert_contents>
|
|
8 </element>
|
|
9 </xml>
|
|
10 <xml name="element_has_text" token_name="" token_text="">
|
|
11 <element name="@NAME@">
|
4
|
12 <assert_contents><has_text text="@TEXT@"/></assert_contents>
|
|
13 </element>
|
|
14 </xml>
|
|
15
|
|
16 <xml name="details_output_test" token_assembler="">
|
|
17 <element name="@ASSEMBLER@">
|
|
18 <element name="5000%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element>
|
|
19 <element name="9500%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element>
|
|
20 <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*"/>
|
|
21 <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*"/>
|
|
22 <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*"/>
|
|
23 <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*"/>
|
|
24 <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*"/>
|
|
25 <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*"/>
|
|
26 <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*"/>
|
|
27 <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*"/>
|
|
28 <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*"/>
|
|
29 <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*"/>
|
|
30 <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*"/>
|
|
31 <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*"/>
|
|
32 <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*"/>
|
|
33 <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*"/>
|
|
34 <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*"/>
|
|
35 <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*"/>
|
|
36 <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*"/>
|
|
37 <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*"/>
|
|
38 <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*"/>
|
3
|
39 </element>
|
|
40 </xml>
|
4
|
41
|
|
42 <xml name="txt_output_test" token_assembler="">
|
|
43 <output name="short_report_txt">
|
|
44 <assert_contents>
|
|
45 <has_text text="SHORT SUMMARY REPORT"/>
|
|
46 </assert_contents>
|
|
47 </output>
|
|
48 </xml>
|
|
49 <xml name="tex_output_test" token_assembler="">
|
|
50 <output name="short_report_tex">
|
|
51 <assert_contents>
|
|
52 <has_text text="Short summary report"/>
|
|
53 <has_text text="end{document}"/>
|
|
54 </assert_contents>
|
|
55 </output>
|
|
56 </xml>
|
|
57 <xml name="tsv_output_test" token_assembler="">
|
|
58 <output name="short_report_tsv">
|
|
59 <assert_contents>
|
|
60 <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$"/>
|
|
61 </assert_contents>
|
|
62 </output>
|
|
63 </xml>
|
|
64 <xml name="pdf_output_test" token_assembler="">
|
|
65 <output name="short_report_pdf">
|
|
66 <assert_contents>
|
|
67 <has_text text="rnaQUAST short report"/>
|
|
68 </assert_contents>
|
|
69 </output>
|
|
70 </xml>
|
0
|
71 </macros>
|
|
72 <requirements>
|
|
73 <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement>
|
|
74 </requirements>
|
|
75 <stdio>
|
|
76 <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" />
|
|
77 </stdio>
|
|
78 <command detect_errors="exit_code"><![CDATA[
|
|
79 #import re
|
4
|
80 #for $i in $in_fasta
|
0
|
81 ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
|
|
82 #end for
|
|
83 #if $r
|
|
84 #for $rf in $r
|
|
85 ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' &&
|
|
86 #end for
|
|
87 #end if
|
|
88 #if $gene_coordinates.use_gtf == "true"
|
|
89 #for $g in $gene_coordinates.gtf
|
|
90 ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' &&
|
|
91 #end for
|
|
92 #end if
|
|
93 mkdir outputdir &&
|
|
94 rnaQUAST.py
|
|
95 --threads \${GALAXY_SLOTS:-1}
|
|
96 --transcripts
|
4
|
97 #for $i in $in_fasta
|
0
|
98 '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
|
|
99 #end for
|
|
100 $strand_specific
|
|
101 #if $r
|
|
102 -r
|
|
103 #for $rf in $r
|
|
104 '${re.sub('[^\w\-.]', '_', rf.element_identifier)}'
|
|
105 #end for
|
|
106 #end if
|
|
107 #if $gene_coordinates.use_gtf == "true"
|
|
108 --gtf
|
|
109 #for $g in $gene_coordinates.gtf
|
|
110 '${re.sub('[^\w\-.]', '_', g.element_identifier)}'
|
|
111 #end for
|
|
112 $gene_coordinates.disable_infer_genes
|
|
113 $gene_coordinates.disable_infer_transcripts
|
|
114 #end if
|
|
115 $prokaryote
|
|
116 --min_alignment '$min_alignment'
|
|
117 #if "pdf" not in $out_sr and "plots" not in $out_add
|
|
118 --no_plots
|
|
119 #end if
|
|
120 $blat
|
|
121 $busco_lineage
|
4
|
122 ##GeneMarkS-T is not available in conda $gene_mark
|
|
123 $meta
|
0
|
124 --lower_threshold $lower_threshold
|
|
125 --upper_threshold $upper_threshold
|
|
126 -o outputdir
|
4
|
127 && mkdir details
|
|
128 ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output)
|
|
129 ## to a joint dir (details) to make them discoverable
|
|
130 ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work)
|
|
131 #for $i in $in_fasta
|
0
|
132 #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
|
|
133 &&
|
|
134 (for f in \$(find 'outputdir/'$basename'_output' -type f);
|
|
135 do
|
|
136 d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) &&
|
4
|
137 mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')";
|
0
|
138 done)
|
|
139 #end for
|
|
140 ## rename .list files to .txt files to make them detectable (format detection by extension)
|
|
141 ## the final `true` seems needed since otherwise the `;` at the end is swallowed
|
|
142 && find details/ -name "*.list" -exec mv {} {}.txt \;
|
|
143 && true
|
|
144 ]]></command>
|
|
145 <inputs>
|
4
|
146 <param name="in_fasta" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/>
|
0
|
147 <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific"/>
|
|
148 <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
|
|
149 <conditional name="gene_coordinates">
|
|
150 <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl.">
|
|
151 <option value="true" selected="true">Yes</option>
|
|
152 <option value="false">No</option>
|
|
153 </param>
|
|
154 <when value="true">
|
3
|
155 <param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file"/>
|
0
|
156 <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?"/>
|
|
157 <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?"/>
|
|
158 </when>
|
|
159 <when value="false">
|
|
160 </when>
|
|
161 </conditional>
|
|
162 <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?"/>
|
|
163 <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used"/>
|
|
164 <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" />
|
|
165 <param argument="--busco_lineage" type="boolean" truevalue="--busco_lineage" falsevalue="" checked="false" label="Run BUSCO tool?" help="The BUSCO tool detects core genes in the assembly. Use this option to provide path to the BUSCO lineage data (Eukaryota, Metazoa, Arthropoda, Vertebrata or Fungi)."/>
|
4
|
166 <!-- GeneMarkS-T is not available in conda <param argument="\-\-gene_mark" type="boolean" truevalue="\-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/>-->
|
|
167 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for Meta Transcriptome"/>
|
0
|
168 <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x_assembled/covered/matched metrics."/>
|
|
169 <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x_assembled/covered/matched metrics."/>
|
|
170 <param name="out_sr" type="select" multiple="true" label="Short report formats">
|
|
171 <option value="tsv" selected="true">tabular</option>
|
|
172 <option value="txt">txt</option>
|
|
173 <option value="tex">tex</option>
|
|
174 <option value="pdf" selected="true">pdf</option>
|
|
175 </param>
|
|
176 <param name="out_add" type="select" multiple="true" label="Additional outputs">
|
|
177 <option value="logs">Logs</option>
|
|
178 <option value="plots" selected="true">Plots (only for n>1)</option>
|
|
179 <option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option>
|
|
180 <option value="details" selected="true">Details per Chromosomes/scaffolds file</option>
|
|
181 <option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option>
|
|
182 </param>
|
|
183 </inputs>
|
|
184
|
|
185 <outputs>
|
|
186 <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf">
|
|
187 <filter>"pdf" in out_sr</filter>
|
|
188 </data>
|
|
189 <data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt">
|
|
190 <filter>"txt" in out_sr</filter>
|
|
191 </data>
|
|
192 <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex">
|
|
193 <filter>"tex" in out_sr</filter>
|
|
194 </data>
|
|
195 <data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv">
|
|
196 <filter>"tsv" in out_sr</filter>
|
|
197 </data>
|
|
198 <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs" >
|
|
199 <discover_datasets ext="txt" pattern="(?P<name>.+)\.log" directory="outputdir/logs/" visible="false" />
|
|
200 <filter>"logs" in out_add</filter>
|
|
201 </collection>
|
4
|
202 <!-- note the output filter of the next two outputs checks if there is
|
|
203 more than 1 input for in_fasta (for 1 its a HDA, for more list or HDAs) -->
|
3
|
204 <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots" >
|
0
|
205 <discover_datasets ext="png" pattern="(?P<name>.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true"/>
|
4
|
206 <filter> isinstance(in_fasta, list) and "plots" in out_add</filter>
|
0
|
207 </collection>
|
3
|
208 <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison" >
|
0
|
209 <discover_datasets ext="txt" pattern="(?P<name>.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" />
|
4
|
210 <filter> isinstance(in_fasta, list) and "comparison" in out_add</filter>
|
0
|
211 </collection>
|
3
|
212 <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output">
|
0
|
213 <discover_datasets pattern="(?P<identifier_0>.+)_____(?P<identifier_1>.+)\.(?P<ext>txt)" directory="details/" visible="false"/>
|
2
|
214 <filter>"details" in out_add</filter>
|
0
|
215 </collection>
|
3
|
216 <collection name="details_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots">
|
0
|
217 <discover_datasets pattern="(?P<identifier_0>.+)_____(?P<identifier_1>.+)\.(?P<ext>png)" directory="details/" visible="false"/>
|
2
|
218 <filter>"details_plots" in out_add</filter>
|
0
|
219 </collection>
|
|
220 </outputs>
|
|
221 <tests>
|
|
222 <test expect_num_outputs="7">
|
4
|
223 <param name="in_fasta" value="idba.fasta,Trinity.fasta" ftype="fasta" />
|
0
|
224 <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
|
|
225 <conditional name="gene_coordinates">
|
|
226 <param name="use_gtf" value="true" />
|
|
227 <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" />
|
|
228 <param name="disable_infer_genes" value="true"/>
|
|
229 <param name="disable_infer_transcripts" value="true"/>
|
|
230 </conditional>
|
|
231 <param name="out_sr" value="txt,tex,tsv" />
|
|
232 <param name="out_add" value="logs,comparison,plots,details" />
|
4
|
233 <expand macro="txt_output_test"/>
|
|
234 <expand macro="tex_output_test"/>
|
|
235 <expand macro="tsv_output_test"/>
|
3
|
236 <output_collection name="comparison_png" type="list" count="15"/>
|
|
237 <output_collection name="comparison" type="list" count="19"/>
|
0
|
238 <output_collection name="list_logs" type="list" count="8"/>
|
3
|
239 <output_collection name="details" type="list:list" count="2">
|
4
|
240 <expand macro="details_output_test" assembler="Trinity"/>
|
|
241 <expand macro="details_output_test" assembler="idba"/>
|
0
|
242 </output_collection>
|
|
243 </test>
|
4
|
244 <test expect_num_outputs="6">
|
|
245 <param name="in_fasta" value="Trinity.fasta" ftype="fasta" />
|
0
|
246 <conditional name="gene_coordinates">
|
|
247 <param name="use_gtf" value="false" />
|
|
248 </conditional>
|
|
249 <param name="min_alignment" value="30" />
|
|
250 <param name="lower_threshold" value="45" />
|
|
251 <param name="upper_threshold" value="95"/>
|
|
252 <param name="out_sr" value="txt,tex,tsv,pdf" />
|
3
|
253 <param name="out_add" value="logs,details_plots" />
|
4
|
254
|
|
255 <expand macro="pdf_output_test"/>
|
|
256 <expand macro="tex_output_test"/>
|
|
257 <expand macro="tsv_output_test"/>
|
|
258 <expand macro="txt_output_test"/>
|
0
|
259 <output_collection name="list_logs" type="list">
|
4
|
260 <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text=""/>
|
|
261 <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!"/>
|
0
|
262 </output_collection>
|
3
|
263 <output_collection name="details_png" type="list:list" count="1">
|
4
|
264 <element name="Trinity">
|
|
265 <expand macro="element_has_text" name="Nx" text="PNG"/>
|
|
266 <expand macro="element_has_text" name="transcript_length" text="PNG"/>
|
|
267 </element>
|
0
|
268 </output_collection>
|
|
269 </test>
|
|
270 </tests>
|
|
271 <help><![CDATA[
|
3
|
272 **What is rnaQUAST**
|
|
273 - a quality assessment tool for de novo transcriptome assemblies
|
|
274 - evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database
|
|
275 - calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts
|
4
|
276 **Using rnaQuast without reference** you wont get:
|
|
277 - x-assembled (Exons)
|
|
278 - Alignments per Isoform
|
3
|
279 - x-covered (Exons)
|
|
280 - x-matched (Blocks)
|
|
281 - gmap build logs
|
0
|
282 **Using rnaQuast with reference** you will get:
|
3
|
283 - Reports
|
|
284 - Logs
|
|
285 - Alignement/Basic Metrics
|
|
286 - Misassemblies/ Specificity/ Sensitivity
|
0
|
287 - Alignment multiplicity
|
4
|
288 - Block/ Transcript Lentgh
|
0
|
289 - Blocks per alignment
|
|
290 - Mismatch rate
|
3
|
291 - x-aligned
|
4
|
292 - Nx
|
3
|
293 - Blocks per alignment
|
|
294 - gmap build logs
|
|
295 **Using rnaQuast without gene coordinates** you wont get:
|
|
296 - x-assembled (Exons)
|
|
297 - Alignments per Isoform
|
|
298 - x-covered (Exons)
|
|
299 - x-matched (Blocks)
|
|
300 - gmap build logs
|
0
|
301 - Database Metrics
|
3
|
302 - Alignment multiplicity
|
|
303 - Mismatch rate
|
|
304 - NAx
|
4
|
305 - x-aligned
|
3
|
306 **Using rnaQuast with gene coordinates** you will get:
|
|
307 - Reports
|
|
308 - Logs
|
|
309 - Alignement/Basic Metrics
|
|
310 - Misassemblies/Specificity/Sensitivity
|
|
311 - Alignment multiplicity
|
|
312 - Block/Transcript length
|
|
313 - Blocks per alignment
|
|
314 - Mismatch rate
|
|
315 - x-aligned
|
|
316 - Nx/NAx
|
|
317 - gmap build logs
|
|
318 - Database Metrics
|
|
319 - Alignment multiplicity
|
|
320 More informations, see citations.
|
0
|
321 ]]></help>
|
|
322 <citations>
|
|
323 <citation type="doi">10.1093/bioinformatics/btw218 </citation>
|
|
324 </citations>
|
|
325 </tool>
|