comparison rna_quast.xml @ 4:cc0366f0bdf7 draft

Uploaded
author lehmanju
date Fri, 16 Oct 2020 06:20:35 +0000
parents bf3dc4cae5bf
children
comparison
equal deleted inserted replaced
3:bf3dc4cae5bf 4:cc0366f0bdf7
7 <assert_contents><has_line_matching expression="@EXPRESSION@"/></assert_contents> 7 <assert_contents><has_line_matching expression="@EXPRESSION@"/></assert_contents>
8 </element> 8 </element>
9 </xml> 9 </xml>
10 <xml name="element_has_text" token_name="" token_text=""> 10 <xml name="element_has_text" token_name="" token_text="">
11 <element name="@NAME@"> 11 <element name="@NAME@">
12 <assert_contents><has_text text="@TEXXT@"/></assert_contents> 12 <assert_contents><has_text text="@TEXT@"/></assert_contents>
13 </element> 13 </element>
14 </xml>
15
16 <xml name="details_output_test" token_assembler="">
17 <element name="@ASSEMBLER@">
18 <element name="5000%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element>
19 <element name="9500%-assembled.list"><assert_contents><has_n_lines n="0"/></assert_contents></element>
20 <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*"/>
21 <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*"/>
22 <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*"/>
23 <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*"/>
24 <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*"/>
25 <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*"/>
26 <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*"/>
27 <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*"/>
28 <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*"/>
29 <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*"/>
30 <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*"/>
31 <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*"/>
32 <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*"/>
33 <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*"/>
34 <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*"/>
35 <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*"/>
36 <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*"/>
37 <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*"/>
38 <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*"/>
39 </element>
40 </xml>
41
42 <xml name="txt_output_test" token_assembler="">
43 <output name="short_report_txt">
44 <assert_contents>
45 <has_text text="SHORT SUMMARY REPORT"/>
46 </assert_contents>
47 </output>
48 </xml>
49 <xml name="tex_output_test" token_assembler="">
50 <output name="short_report_tex">
51 <assert_contents>
52 <has_text text="Short summary report"/>
53 <has_text text="end{document}"/>
54 </assert_contents>
55 </output>
56 </xml>
57 <xml name="tsv_output_test" token_assembler="">
58 <output name="short_report_tsv">
59 <assert_contents>
60 <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$"/>
61 </assert_contents>
62 </output>
63 </xml>
64 <xml name="pdf_output_test" token_assembler="">
65 <output name="short_report_pdf">
66 <assert_contents>
67 <has_text text="rnaQUAST short report"/>
68 </assert_contents>
69 </output>
14 </xml> 70 </xml>
15 </macros> 71 </macros>
16 <requirements> 72 <requirements>
17 <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement> 73 <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement>
18 </requirements> 74 </requirements>
19 <stdio> 75 <stdio>
20 <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" /> 76 <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" />
21 </stdio> 77 </stdio>
22 <command detect_errors="exit_code"><![CDATA[ 78 <command detect_errors="exit_code"><![CDATA[
23 #import re 79 #import re
24 #for $i in $input 80 #for $i in $in_fasta
25 ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' && 81 ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
26 #end for 82 #end for
27 #if $r 83 #if $r
28 #for $rf in $r 84 #for $rf in $r
29 ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' && 85 ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' &&
36 #end if 92 #end if
37 mkdir outputdir && 93 mkdir outputdir &&
38 rnaQUAST.py 94 rnaQUAST.py
39 --threads \${GALAXY_SLOTS:-1} 95 --threads \${GALAXY_SLOTS:-1}
40 --transcripts 96 --transcripts
41 #for $i in $input 97 #for $i in $in_fasta
42 '${re.sub('[^\w\-.]', '_', i.element_identifier)}' 98 '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
43 #end for 99 #end for
44 $strand_specific 100 $strand_specific
45 #if $r 101 #if $r
46 -r 102 -r
61 #if "pdf" not in $out_sr and "plots" not in $out_add 117 #if "pdf" not in $out_sr and "plots" not in $out_add
62 --no_plots 118 --no_plots
63 #end if 119 #end if
64 $blat 120 $blat
65 $busco_lineage 121 $busco_lineage
66 $gene_mark 122 ##GeneMarkS-T is not available in conda $gene_mark
123 $meta
67 --lower_threshold $lower_threshold 124 --lower_threshold $lower_threshold
68 --upper_threshold $upper_threshold 125 --upper_threshold $upper_threshold
69 -o outputdir 126 -o outputdir
70 && mkdir details 127 && mkdir details
71 #for $i in $input 128 ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output)
129 ## to a joint dir (details) to make them discoverable
130 ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work)
131 #for $i in $in_fasta
72 #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0] 132 #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
73 && 133 &&
74 (for f in \$(find 'outputdir/'$basename'_output' -type f); 134 (for f in \$(find 'outputdir/'$basename'_output' -type f);
75 do 135 do
76 d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) && 136 d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) &&
77 mv \$f details/"\$d"_____"\$(basename \$f)"; 137 mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')";
78 done) 138 done)
79 #end for 139 #end for
80 ## rename .list files to .txt files to make them detectable (format detection by extension) 140 ## rename .list files to .txt files to make them detectable (format detection by extension)
81 ## the final `true` seems needed since otherwise the `;` at the end is swallowed 141 ## the final `true` seems needed since otherwise the `;` at the end is swallowed
82 && find details/ -name "*.list" -exec mv {} {}.txt \; 142 && find details/ -name "*.list" -exec mv {} {}.txt \;
83 && true 143 && true
84 ]]></command> 144 ]]></command>
85 <inputs> 145 <inputs>
86 <param name="input" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/> 146 <param name="in_fasta" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/>
87 <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific"/> 147 <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific"/>
88 <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" /> 148 <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
89 <conditional name="gene_coordinates"> 149 <conditional name="gene_coordinates">
90 <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl."> 150 <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl.">
91 <option value="true" selected="true">Yes</option> 151 <option value="true" selected="true">Yes</option>
101 </conditional> 161 </conditional>
102 <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?"/> 162 <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?"/>
103 <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used"/> 163 <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used"/>
104 <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" /> 164 <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" />
105 <param argument="--busco_lineage" type="boolean" truevalue="--busco_lineage" falsevalue="" checked="false" label="Run BUSCO tool?" help="The BUSCO tool detects core genes in the assembly. Use this option to provide path to the BUSCO lineage data (Eukaryota, Metazoa, Arthropoda, Vertebrata or Fungi)."/> 165 <param argument="--busco_lineage" type="boolean" truevalue="--busco_lineage" falsevalue="" checked="false" label="Run BUSCO tool?" help="The BUSCO tool detects core genes in the assembly. Use this option to provide path to the BUSCO lineage data (Eukaryota, Metazoa, Arthropoda, Vertebrata or Fungi)."/>
106 <param argument="--gene_mark" type="boolean" truevalue="--gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/> 166 <!-- GeneMarkS-T is not available in conda <param argument="\-\-gene_mark" type="boolean" truevalue="\-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/>-->
167 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for Meta Transcriptome"/>
107 <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x_assembled/covered/matched metrics."/> 168 <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x_assembled/covered/matched metrics."/>
108 <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x_assembled/covered/matched metrics."/> 169 <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x_assembled/covered/matched metrics."/>
109 <param name="out_sr" type="select" multiple="true" label="Short report formats"> 170 <param name="out_sr" type="select" multiple="true" label="Short report formats">
110 <option value="tsv" selected="true">tabular</option> 171 <option value="tsv" selected="true">tabular</option>
111 <option value="txt">txt</option> 172 <option value="txt">txt</option>
136 </data> 197 </data>
137 <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs" > 198 <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs" >
138 <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs/" visible="false" /> 199 <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs/" visible="false" />
139 <filter>"logs" in out_add</filter> 200 <filter>"logs" in out_add</filter>
140 </collection> 201 </collection>
202 <!-- note the output filter of the next two outputs checks if there is
203 more than 1 input for in_fasta (for 1 its a HDA, for more list or HDAs) -->
141 <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots" > 204 <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots" >
142 <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true"/> 205 <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true"/>
143 <filter> len(input)>1 and "plots" in out_add</filter> 206 <filter> isinstance(in_fasta, list) and "plots" in out_add</filter>
144 </collection> 207 </collection>
145 <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison" > 208 <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison" >
146 <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" /> 209 <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" />
147 <filter> len(input)>1 and "comparison" in out_add</filter> 210 <filter> isinstance(in_fasta, list) and "comparison" in out_add</filter>
148 </collection> 211 </collection>
149 <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output"> 212 <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output">
150 <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false"/> 213 <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false"/>
151 <filter>"details" in out_add</filter> 214 <filter>"details" in out_add</filter>
152 </collection> 215 </collection>
155 <filter>"details_plots" in out_add</filter> 218 <filter>"details_plots" in out_add</filter>
156 </collection> 219 </collection>
157 </outputs> 220 </outputs>
158 <tests> 221 <tests>
159 <test expect_num_outputs="7"> 222 <test expect_num_outputs="7">
160 <param name="input" value="idba.fasta,Trinity.fasta" ftype="fasta" /> 223 <param name="in_fasta" value="idba.fasta,Trinity.fasta" ftype="fasta" />
161 <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" /> 224 <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
162 <conditional name="gene_coordinates"> 225 <conditional name="gene_coordinates">
163 <param name="use_gtf" value="true" /> 226 <param name="use_gtf" value="true" />
164 <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" /> 227 <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" />
165 <param name="disable_infer_genes" value="true"/> 228 <param name="disable_infer_genes" value="true"/>
166 <param name="disable_infer_transcripts" value="true"/> 229 <param name="disable_infer_transcripts" value="true"/>
167 </conditional> 230 </conditional>
168 <param name="out_sr" value="txt,tex,tsv" /> 231 <param name="out_sr" value="txt,tex,tsv" />
169 <param name="out_add" value="logs,comparison,plots,details" /> 232 <param name="out_add" value="logs,comparison,plots,details" />
170 <output name="short_report_txt"> 233 <expand macro="txt_output_test"/>
171 <assert_contents> 234 <expand macro="tex_output_test"/>
172 <has_text text="SHORT SUMMARY REPORT"/> 235 <expand macro="tsv_output_test"/>
173 </assert_contents>
174 </output>
175 <output name="short_report_tex">
176 <assert_contents>
177 <has_text text="Short summary report"/>
178 <has_text text="end{document}"/>
179 </assert_contents>
180 </output>
181 <output name="short_report_tsv">
182 <assert_contents>
183 <has_line_matching expression="^METRICS/TRANSCRIPTS\tidba\tTrinity$"/>
184 </assert_contents>
185 </output>
186 <output_collection name="comparison_png" type="list" count="15"/> 236 <output_collection name="comparison_png" type="list" count="15"/>
187 <output_collection name="comparison" type="list" count="19"/> 237 <output_collection name="comparison" type="list" count="19"/>
188 <output_collection name="list_logs" type="list" count="8"/> 238 <output_collection name="list_logs" type="list" count="8"/>
189 <output_collection name="details" type="list:list" count="2"> 239 <output_collection name="details" type="list:list" count="2">
190 <output_collection name="Trinity" type="list" count="21"/> 240 <expand macro="details_output_test" assembler="Trinity"/>
191 <output_collection name="idba" type="list" count="21"/> 241 <expand macro="details_output_test" assembler="idba"/>
192 </output_collection> 242 </output_collection>
193 </test> 243 </test>
194 <test expect_num_outputs="8"> 244 <test expect_num_outputs="6">
195 <param name="input" value="Trinity.fasta" ftype="fasta" /> 245 <param name="in_fasta" value="Trinity.fasta" ftype="fasta" />
196 <conditional name="gene_coordinates"> 246 <conditional name="gene_coordinates">
197 <param name="use_gtf" value="false" /> 247 <param name="use_gtf" value="false" />
198 </conditional> 248 </conditional>
199 <param name="min_alignment" value="30" /> 249 <param name="min_alignment" value="30" />
200 <param name="lower_threshold" value="45" /> 250 <param name="lower_threshold" value="45" />
201 <param name="upper_threshold" value="95"/> 251 <param name="upper_threshold" value="95"/>
202 <param name="out_sr" value="txt,tex,tsv,pdf" /> 252 <param name="out_sr" value="txt,tex,tsv,pdf" />
203 <param name="out_add" value="logs,details_plots" /> 253 <param name="out_add" value="logs,details_plots" />
204 <output name="short_report_pdf" file="short_report.pdf" compare="sim_size"/> 254
205 <output name="short_report_txt" file="short_report.txt" compare="sim_size"/> 255 <expand macro="pdf_output_test"/>
206 <output name="short_report_tex" file="short_report.tex" compare="sim_size"/> 256 <expand macro="tex_output_test"/>
207 <output name="short_report_tsv" file="short_report.tsv" compare="sim_size"/> 257 <expand macro="tsv_output_test"/>
258 <expand macro="txt_output_test"/>
208 <output_collection name="list_logs" type="list"> 259 <output_collection name="list_logs" type="list">
209 <element name="rnaQUAST" file="rnaQUAST"/> 260 <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text=""/>
210 <element name="Trinity.GeneMarkS_T.err" file="spades.311.GeneMarkS_T.err"/> 261 <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!"/>
211 </output_collection> 262 </output_collection>
212 <output_collection name="details_png" type="list:list" count="1"> 263 <output_collection name="details_png" type="list:list" count="1">
213 <output_collection name="Trinity" type="list" count="11"/> 264 <element name="Trinity">
265 <expand macro="element_has_text" name="Nx" text="PNG"/>
266 <expand macro="element_has_text" name="transcript_length" text="PNG"/>
267 </element>
214 </output_collection> 268 </output_collection>
215 </test> 269 </test>
216 </tests> 270 </tests>
217 <help><![CDATA[ 271 <help><![CDATA[
218 **What is rnaQUAST** 272 **What is rnaQUAST**
219 - a quality assessment tool for de novo transcriptome assemblies 273 - a quality assessment tool for de novo transcriptome assemblies
220 - evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database 274 - evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database
221 - calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts 275 - calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts
222 276 **Using rnaQuast without reference** you wont get:
223 **Using rnaQuast without reference** you wont get: 277 - x-assembled (Exons)
224 278 - Alignments per Isoform
225 - x-assembled (Exons)
226 - Alignments per Isoform
227 - x-covered (Exons) 279 - x-covered (Exons)
228 - x-matched (Blocks) 280 - x-matched (Blocks)
229 - gmap build logs 281 - gmap build logs
230
231 **Using rnaQuast with reference** you will get: 282 **Using rnaQuast with reference** you will get:
232 - Reports 283 - Reports
233 - Logs 284 - Logs
234 - Alignement/Basic Metrics 285 - Alignement/Basic Metrics
235 - Misassemblies/ Specificity/ Sensitivity 286 - Misassemblies/ Specificity/ Sensitivity
236 - Alignment multiplicity 287 - Alignment multiplicity
237 - Block/ Transcript Lentgh 288 - Block/ Transcript Lentgh
238 - Blocks per alignment 289 - Blocks per alignment
239 - Mismatch rate 290 - Mismatch rate
240 - x-aligned 291 - x-aligned
241 - Nx 292 - Nx
242 - Blocks per alignment 293 - Blocks per alignment
243 - gmap build logs 294 - gmap build logs
244
245 **Using rnaQuast without gene coordinates** you wont get: 295 **Using rnaQuast without gene coordinates** you wont get:
246 - x-assembled (Exons) 296 - x-assembled (Exons)
247 - Alignments per Isoform 297 - Alignments per Isoform
248 - x-covered (Exons) 298 - x-covered (Exons)
249 - x-matched (Blocks) 299 - x-matched (Blocks)
250 - gmap build logs 300 - gmap build logs
251 - Database Metrics 301 - Database Metrics
252 - Alignment multiplicity 302 - Alignment multiplicity
253 - Mismatch rate 303 - Mismatch rate
254 - NAx 304 - NAx
255 - x-aligned 305 - x-aligned
256 **Using rnaQuast with gene coordinates** you will get: 306 **Using rnaQuast with gene coordinates** you will get:
257 - Reports 307 - Reports
258 - Logs 308 - Logs
259 - Alignement/Basic Metrics 309 - Alignement/Basic Metrics
260 - Misassemblies/Specificity/Sensitivity 310 - Misassemblies/Specificity/Sensitivity