comparison quast.xml @ 0:5367786dc871 draft default tip

Uploaded
author greg
date Tue, 14 Mar 2023 15:21:14 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5367786dc871
1 <tool id="quast" name="Quast" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Genome assembly Quality</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="bio_tools"/>
7 <expand macro='requirements' />
8 <command detect_errors="exit_code">
9 <![CDATA[
10 #import re
11 #import os
12
13 #if str($in.custom) == 'false'
14 #set $labels = ','.join( [re.sub('[^\w\-_]', '_', str($x.element_identifier)) for $x in $in.inputs])
15 echo $labels &&
16 #else
17 #set $labels = []
18 #for $x in $in.inputs
19 #if str($x.labels) != ''
20 #silent $labels.append(re.sub('[^\w\-_]', '_', str($x.labels)))
21 #else
22 #silent $labels.append(re.sub('[^\w\-_]', '_', str($x.input.element_identifier)))
23 #end if
24 #end for
25 #set $labels = ','.join($labels)
26 #end if
27
28 #if $assembly.type == 'metagenome' and $assembly.ref.origin == 'list'
29 #set $temp_ref_list_fp = 'temp_ref_list'
30 #for $i in $assembly.ref.references_list.split(',')
31 echo $i >> $temp_ref_list_fp &&
32 #end for
33 #end if
34
35 #if $reads.reads_option == 'paired'
36 #for $read in $reads.input_1
37 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($read.element_identifier))
38 ln -s '$read' 'pe1-${identifier}.${read.ext}' &&
39 #end for
40 #for $read in $reads.input_2
41 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($read.element_identifier))
42 ln -s '$read' 'pe2-${identifier}.${read.ext}' &&
43 #end for
44 #else if $reads.reads_option == 'paired_collection'
45 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($reads.input_1.element_identifier))
46 ln -s '$reads.input_1.forward' 'pe1-${identifier}.${reads.input_1.forward.ext}' &&
47 ln -s '$reads.input_1.reverse' 'pe2-${identifier}.${reads.input_1.reverse.ext}' &&
48 #end if
49
50 #if $assembly.type == 'genome'
51 quast
52 #else
53 metaquast
54 #end if
55
56 #if $reads.reads_option == 'single'
57 #for $read in $reads.input_1
58 --single '$read'
59 #end for
60 #else if $reads.reads_option == 'paired'
61 #for $read in $reads.input_1
62 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($read.element_identifier))
63 --pe1 'pe1-${identifier}.${read.ext}'
64 #end for
65 #for $read in $reads.input_2
66 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($read.element_identifier))
67 --pe2 'pe2-${identifier}.${read.ext}'
68 #end for
69 #else if $reads.reads_option == 'paired_collection'
70 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($reads.input_1.element_identifier))
71 --pe1 'pe1-${identifier}.${reads.input_1.forward.ext}'
72 --pe2 'pe2-${identifier}.${reads.input_1.reverse.ext}'
73 #else if $reads.reads_option == 'paired_interlaced'
74 #for $read in $reads.input_1
75 --pe12 '$read'
76 #end for
77 #else if $reads.reads_option == 'mate_paired'
78 #for $read in $reads.input_1
79 --mp1 '$read'
80 #end for
81 #for $read in $reads.input_2
82 --mp2 '$read'
83 #end for
84 #else if $reads.reads_option == 'pacbio'
85 #for $read in $reads.input_1
86 --pacbio '$read'
87 #end for
88 #else if $reads.reads_option == 'nanopore'
89 #for $read in $reads.input_1
90 --nanopore '$read'
91 #end for
92 #end if
93
94 --labels '$labels'
95 -o 'outputdir'
96
97 #if $assembly.type == 'genome'
98 #if $assembly.ref.use_ref == 'true'
99 #if $assembly.ref.reference_source.reference_source_selector == 'history':
100 -r '$assembly.ref.reference_source.r'
101 #else:
102 -r '$assembly.ref.reference_source.r.fields.path'
103 #end if
104 #if $assembly.ref.features
105 --features '$assembly.ref.features'
106 #end if
107 #if $assembly.ref.operons
108 --operons '$assembly.ref.operons'
109 #end if
110 $assembly.ref.circos
111 $assembly.ref.k_mer.k_mer_stats
112 #if str($assembly.ref.k_mer.k_mer_stats) != ''
113 --k-mer-size $assembly.ref.k_mer.k_mer_size
114 #end if
115 #else if $assembly.ref.est_ref_size
116 --est-ref-size $assembly.ref.est_ref_size
117 #end if
118 $assembly.orga_type
119 #else if $assembly.type == 'metagenome'
120 #if $assembly.ref.origin == 'history'
121 -r '$assembly.ref.r'
122 #else if $assembly.ref.origin == 'list'
123 --references-list '$temp_ref_list_fp'
124 #else if $assembly.ref.origin == 'silva'
125 --test-no-ref
126 --max-ref-num $assembly.ref.max_ref_num
127 #end if
128 $assembly.reuse_combined_alignments
129 #end if
130
131 --min-identity $assembly.min_identity
132 --min-contig $min_contig
133 $split_scaffolds
134 $large
135
136 #if str($genes.gene_finding.tool) != 'none'
137 $genes.gene_finding.tool
138 #if $genes.gene_finding.tool == '--gene_finding' or $genes.gene_finding.tool == '--glimmer'
139 #set $gene_threshold = ','.join([x.strip() for x in str($genes.gene_finding.gene_thresholds).split(',')])
140 --gene-thresholds '$gene_threshold'
141 #end if
142 #end if
143
144 $genes.rna_finding
145 $genes.conserved_genes_finding
146 $alignments.use_all_alignments
147 --min-alignment $alignments.min_alignment
148 --ambiguity-usage '$alignments.ambiguity_usage'
149 --ambiguity-score $alignments.ambiguity_score
150 $alignments.fragmented
151 $alignments.upper_bound_assembly
152 #if $alignments.upper_bound_min_con
153 --upper-bound-min-con $alignments.upper_bound_min_con
154 #end if
155 #if $alignments.local_mis_size
156 --local-mis-size $alignments.local_mis_size
157 #end if
158
159 #if $alignments.fragmented
160 #if $advanced.fragmented_max_indent != ''
161 --fragmented-max-indent $advanced.fragmented_max_indent
162 #end if
163 #end if
164
165 #set $contig_thresholds = ','.join([x.strip() for x in str($advanced.contig_thresholds).split(',')])
166 --contig-thresholds '$contig_thresholds'
167 $advanced.strict_NA
168 --extensive-mis-size $advanced.extensive_mis_size
169 --scaffold-gap-max-size $advanced.scaffold_gap_max_size
170 --unaligned-part-size $advanced.unaligned_part_size
171 $advanced.skip_unaligned_mis_contigs
172 $advanced.report_all_metrics
173 --x-for-Nx $advanced.x_for_Nx
174
175 #if str($in.custom) == 'false'
176 #for $k in $in.inputs
177 '$k'
178 #end for
179 #else
180 #for $k in $in.inputs
181 '$k.input'
182 #end for
183 #end if
184 --threads \${GALAXY_SLOTS:-1}
185
186 #if $assembly.type == 'genome'
187 && mkdir -p '$report_html.files_path'
188 && cp outputdir/*.html '$report_html.files_path'
189 #if $assembly.ref.use_ref
190 && cp -R outputdir/icarus_viewers '$report_html.files_path'
191 #end if
192 #else
193 && if [[ -f "outputdir/report.tsv" ]]; then mkdir -p "outputdir/combined_reference/" && cp "outputdir/report.tsv" "outputdir/combined_reference/report.tsv"; fi
194 && if [[ -f "outputdir/report.html" ]]; then mkdir -p "outputdir/combined_reference/" && cp outputdir/*.html "outputdir/combined_reference/"; fi
195 && mkdir -p '$report_html_meta.files_path'
196 && cp outputdir/combined_reference/*.html '$report_html_meta.files_path'
197 && if [[ -d "outputdir/icarus_viewers" ]]; then cp -R outputdir/icarus_viewers 'outputdir/combined_reference/'; fi
198 && if [[ -d "outputdir/combined_reference/icarus_viewers" ]]; then cp -R outputdir/combined_reference/icarus_viewers '$report_html_meta.files_path'; fi
199 && if [[ -d "outputdir/krona_charts/" ]]; then mkdir -p '$krona.files_path' && cp outputdir/krona_charts/*.html '$krona.files_path'; fi
200 #end if
201 ]]></command>
202 <inputs>
203 <conditional name="in">
204 <param name="custom" type="select" label="Use customized names for the input files?" help="They will be used in reports, plots and logs">
205 <option value="true">Yes, specify custom names</option>
206 <option value="false" selected="true">No, use dataset names</option>
207 </param>
208 <when value="true">
209 <repeat name="inputs" title="Contigs/scaffolds" min="1">
210 <param name="input" type="data" format="fasta" label="Contigs/scaffolds file"/>
211 <param argument="--labels" type="text" value="" label="Name"/>
212 </repeat>
213 </when>
214 <when value="false">
215 <param name="inputs" type="data" format="fasta" multiple="true" label="Contigs/scaffolds file"/>
216 </when>
217 </conditional>
218 <conditional name="reads">
219 <param name="reads_option" type="select" label="Reads options" help="Currently, the supported read types are Illumina unpaired, paired-end and mate-pair reads, PacBio SMRT, and Oxford Nanopore long reads.">
220 <option value="disabled">Disabled</option>
221 <option value="single">Illumina single-end reads</option>
222 <option value="paired">Illumina paired-end reads</option>
223 <option value="paired_collection">Illumina paired-end reads in paired collection</option>
224 <option value="paired_interlaced">Illumina interlaced paired-end reads</option>
225 <option value="mate_paired">Illumina mate-pair reads</option>
226 <option value="pacbio">Pacbio SMRT reads</option>
227 <option value="nanopore">Nanopore reads</option>
228 </param>
229 <when value="disabled"/>
230 <when value="single">
231 <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file" />
232 </when>
233 <when value="paired">
234 <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file #1" />
235 <param name="input_2" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file #2" />
236 </when>
237 <when value="paired_collection">
238 <param name="input_1" type="data_collection" collection_type="paired" format="fastq,fastq.gz,fasta,fasta.gz" label="FASTQ/FASTA files" />
239 </when>
240 <when value="paired_interlaced">
241 <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file" />
242 </when>
243 <when value="mate_paired">
244 <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file #1" />
245 <param name="input_2" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file #2" />
246 </when>
247 <when value="pacbio">
248 <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file" />
249 </when>
250 <when value="nanopore">
251 <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file" />
252 </when>
253 </conditional>
254 <conditional name="assembly">
255 <param name="type" type="select" label="Type of assembly">
256 <option value="genome">Genome</option>
257 <option value="metagenome">Metagenome</option>
258 </param>
259 <when value="genome">
260 <conditional name="ref">
261 <param name="use_ref" type="select" label="Use a reference genome?" help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference.">
262 <option value="true">Yes</option>
263 <option value="false" selected="true">No</option>
264 </param>
265 <when value="true">
266 <conditional name="reference_source">
267 <param name="reference_source_selector" type="select" label="Select a reference genome from your history or select a cached reference genome?">
268 <option value="cached">Select a cached reference genome</option>
269 <option value="history">Select a reference genome from the history and build the index</option>
270 </param>
271 <when value="cached">
272 <param argument="-r" type="select" label="Using reference genome" help="Select genome from the list">
273 <options from_data_table="all_fasta">
274 <filter type="sort_by" column="2"/>
275 <validator type="no_options" message="No reference genomes are available"/>
276 </options>
277 <validator type="no_options" message="A cached reference genome is not available for the build associated with the selected input file"/>
278 </param>
279 </when>
280 <when value="history">
281 <param argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
282 </when>
283 </conditional>
284 <param argument="--features" type="data" format="gff, gff3, bed" optional="true" label="Genomic feature positions in the reference genome" help="Gene coordinates for the reference genome"/>
285 <param argument="--operons" type="data" format="gff, gff3, bed" optional="true" label="Operon positions in the reference genome" help="Operon coordinates for the reference genome"/>
286 <conditional name="k_mer">
287 <param argument="--k-mer-stats" type="select" label="Compute k-mer-based quality metrics?" help="It is recommended for large genomes. This may significantly increase memory and time consumption on large genomes">
288 <option value="--k-mer-stats">Yes</option>
289 <option value="" selected="true">No</option>
290 </param>
291 <when value="--k-mer-stats">
292 <param name="k_mer_size" argument="--k-mer-size" type="integer" value="101" label="Size of k" />
293 </when>
294 <when value=""/>
295 </conditional>
296 <param argument="--circos" type="boolean" truevalue="--circos" falsevalue="" checked="false" label="Generage Circos plot" help="Plot Circos version of Icarus contig alignment viewer"/>
297 </when>
298 <when value="false">
299 <param argument="--est-ref-size" type="integer" optional="true" label="Estimated reference genome size (in bp) for computing NGx statistics" help=""/>
300 </when>
301 </conditional>
302 <param name="orga_type" type="select" label="Type of organism">
303 <option value="">Prokaryotes: use of GeneMarkS for gene finding (default)</option>
304 <option value="--eukaryote">Eukaryote: use of GeneMark-ES for gene finding, Barrnap for ribosomal RNA genes prediction, BUSCO for conserved orthologs finding (--eukaryote)</option>
305 <option value="--fungus">Fungus: use of GeneMark-ES for gene finding, Barrnap for ribosomal RNA genes prediction, BUSCO for conserved orthologs finding (--fungus)</option>
306 </param>
307 <expand macro="min_identity_macros" value="95"/>
308 </when>
309 <when value="metagenome">
310 <conditional name="ref">
311 <param name="origin" type="select" label="Reference genome" help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference.">
312 <option value="history">From history</option>
313 <option value="list">From list</option>
314 <option value="silva">From SILVA database</option>
315 <option value="none" selected="true">None</option>
316 </param>
317 <when value="history">
318 <param argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
319 </when>
320 <when value="list">
321 <param name="references_list" argument="references-list" type="text" value="" label="Comma-separated list of reference genomes" help="MetaQUAST will search for these references in the NCBI database and will download the found ones"/>
322 </when>
323 <when value="silva">
324 <param name="max_ref_num" argument="-max-ref-num" type="integer" value="50" label="Maximum number of reference genomes (per each assembly) to download after searching in the SILVA databa" />
325 </when>
326 <when value="none"/>
327 </conditional>
328 <param argument="--reuse-combined-alignments" type="boolean" truevalue="--reuse-combined-alignments" falsevalue="" checked="false" label="Reuse the alignments on the combined reference" help="Reuse the alignments on the combined reference in the subsequent runs per separate references. That is, the alignment procedure is performed only once (for all assemblies against the combined reference) and does NOT executed for each subgroups of contigs against the corresponding separate reference genomes. In each separate reference run, all precomputed assembly alignments for other references are simply ignored" />
329 <expand macro="min_identity_macros" value="90"/>
330 </when>
331 </conditional>
332 <param argument="--min-contig" type="integer" value="500" label="Lower threshold for a contig length (in bp)" help="Shorter contigs won't be taken into account"/>
333 <param argument="--split-scaffolds" type="boolean" truevalue="--split-scaffolds" falsevalue="" checked="false" label="Are assemblies scaffolds rather than contigs?" help="QUAST will add split versions of assemblies to the comparison. Assemblies are split by continuous fragments of N's of length >= 10. If broken version is equal to the original assembly (i.e. nothing was split) it is not included in the comparison."/>
334 <param argument="--large" type="boolean" truevalue="--large" falsevalue="" checked="false" label="Is genome large (> 100 Mbp)?" help="Use optimal parameters for evaluation of large genomes. Affects speed and accuracy. In particular, imposes --eukaryote --min-contig 3000 --min-alignment 500 --extensive-mis-size 7000 (can be overridden manually with the corresponding options). In addition, this mode tries to identify misassemblies caused by transposable elements and exclude them from the number of misassemblies."/>
335 <section name="genes" title="Genes">
336 <conditional name="gene_finding">
337 <param name="tool" type="select" label="Tool for gene prediction" help="">
338 <option value="none">Don't predict genes</option>
339 <option value="--gene-finding">GeneMarkS if prokaryotes or GeneMark-ES if eukaryotes or fungi</option>
340 <option value="--mgm">MetaGeneMark, specially for metagenomic assembly</option>
341 <option value="--glimmer">Glimmer</option>
342 </param>
343 <when value="none"/>
344 <when value="--gene-finding">
345 <expand macro="gene_thresholds"/>
346 </when>
347 <when value="--mgm"/>
348 <when value="--glimmer">
349 <expand macro="gene_thresholds"/>
350 </when>
351 </conditional>
352 <param argument="--rna-finding" type="boolean" truevalue="--rna-finding" falsevalue="" checked="false" label="Enables ribosomal RNA gene finding?" help="By default, we assume that the genome is prokaryotic, and Barrnap uses the bacterial database for rRNA prediction. If the genome is eukaryotic (fungal), use --eukaryote (--fungus) option to force Barrnap to work with the eukaryotic (fungal) database. "/>
353 <param argument="--conserved-genes-finding" type="boolean" truevalue="--conserved-genes-finding" falsevalue="" checked="false" label="Enables search for Universal Single-Copy Orthologs using BUSCO?" help="By default, we assume that the genome is prokaryotic, and BUSCO uses the bacterial database of orthologs. If the genome is eukaryotic (fungal), use --eukaryote (--fungus) option to force BUSCO to work with the eukaryotic (fungal) database. "/>
354 </section>
355 <section name="alignments" title="Alignments">
356 <param argument="--use-all-alignments" type="boolean" truevalue="--use-all-alignments" falsevalue="" checked="false" label="Use all alignments as in QUAST v.1.*. to compute genome fraction, # genomic features, # operons metrics?" help="By default, QUAST v.2.0 and higher filters out ambiguous and redundant alignments, keeping only one alignment per contig (or one set of non-overlapping or slightly overlapping alignments)"/>
357 <param argument="--min-alignment" type="integer" value="65" label="Minimum length of alignment" help="Alignments shorter than this value will be filtered. Note that all alignments shorter than 65 bp will be filtered regardless of this threshold."/>
358 <param argument="--ambiguity-usage" type="select" label="How processing equally good alignments of a contig (probably repeats)?" help="">
359 <option value="none">Skip all such alignments</option>
360 <option value="one" selected="true">Take only one (the very best one)</option>
361 <option value="all">Use all alignments. It can cause a significant increase of # mismatches (repeats are almost always inexact due to accumulated SNPs, indels, etc.). It is useful for metagenomic assemblies where ambiguous alignments might represent homologous sequences of different strains</option>
362 </param>
363 <param argument="--ambiguity-score" type="float" value="0.99" min="0.8" max="1.0" label="Score S for defining equally good alignments of a single contig" help="All alignments are sorted by decreasing LEN × IDY% value. All alignments with LEN × IDY% less than S × best(LEN × IDY%) are discarded. "/>
364 <param argument="--fragmented" type="boolean" truevalue="--fragmented" falsevalue="" checked="false" label="Fragmented reference genome" help="Reference genome is fragmented (e.g. a scaffold reference). QUAST will try to detect misassemblies caused by the fragmentation and mark them fake (will be excluded from misassemblies). Note: QUAST will not detect misassemblies caused by the linear representation of circular genome "/>
365 <param argument="--upper-bound-assembly" type="boolean" truevalue="--upper-bound-assembly" falsevalue="" label="Simulate upper bound assembly" help="Simulate upper bound assembly based on the reference genome and a given set reads (mate-pairs or long reads, such as Pacbio SMRT/Oxford Nanopore, are REQUIRED). This assembly is added to the comparison and could be useful for estimating the upper bounds of completeness and contiguity that theoretically can be reached by assembly software from this particular set of reads. The concept is based on the fact that the reference genome cannot be completely reconstructed from raw reads due to long genomic repeats and low covered regions." />
366 <param argument="--upper-bound-min-con" type="integer" value="" optional="true" label="Minimal number of 'connecting reads' needed for joining upper bound contigs into a scaffold" help="This is important for a realistic estimation of genome assembly fragmentation due to long repeats. The default values is 2 for mate-pairs and 1 for long reads (PacBio or Nanopore libraries)"/>
367 <param argument="--local-mis-size" type="integer" value="200" optional="true" label="Minimal local misassembly size" help="Lower threshold for the local misassembly size. Shorter inconsistencies are considered as (long) indels. The default value is 200 bp. Note that the threshold should be equal to or lower than minimal extensive misassembly size, which is 1000 bp by default"/>
368 </section>
369 <section name="advanced" title="Advanced options">
370 <param argument="--contig-thresholds" type="text" value="0,1000" label="Comma-separated list of contig length thresholds (in bp)" help="Used in # contigs ≥ x and total length (≥ x) metrics"/>
371 <param argument="--strict-NA" type="boolean" truevalue="--strict-NA" falsevalue="" checked="false" label="Break contigs at every misassembly event (including local ones) to compute NAx and NGAx statistics?" help="By default, QUAST breaks contigs only at extensive misassemblies (not local ones)."/>
372 <param argument="--extensive-mis-size" type="integer" value="1000" min="85" label="Lower threshold for the relocation size (gap or overlap size between left and right flanking sequence)" help="Shorter relocations are considered as local misassemblies. It does not affect other types of extensive misassemblies (inversions and translocations). The default value is 1000 bp. Note that the threshold should be greater than maximum indel length which is 85 bp."/>
373 <param argument="--scaffold-gap-max-size" type="integer" value="1000" label="Max allowed scaffold gap length difference for detecting corresponding type of misassemblies" help="Longer inconsistencies are considered as relocations and thus, counted as extensive misassemblies. The default value is 10000 bp. Note that the threshold make sense only if it is greater than extensive misassembly size"/>
374 <param argument="--unaligned-part-size" type="integer" value="500" label="Lower threshold for detecting partially unaligned contigs" help=""/>
375 <param argument="--skip-unaligned-mis-contigs" type="boolean" truevalue="" falsevalue="--skip-unaligned-mis-contigs" checked="true" label="Distinguish contigs with more than 50% unaligned bases as a separate group of contigs?" help="By default, QUAST breaks contigs only at extensive misassemblies (not local ones)."/>
376 <param argument="--fragmented-max-indent" type="integer" min="0" value="" optional="true" label="Fragment max indent" help="Mark translocation as fake if both alignments are located no further than N bases from the ends of the reference fragments. The value should be less than extensive misassembly size.Default value is 50. Note: requires --fragmented option" />
377 <param argument="--report-all-metrics" type="boolean" truevalue="--report-all-metrics" falsevalue="" checked="false" label="Report all metrics" help="Keep all quality metrics in the main report. Usually, all not-relevant metrics are not included in the report, e.g., reference-based metrics in the no-reference mode. Also, if metric values are undefined for all input assemblies, the metric is removed from the report" />
378 <param argument="--x-for-Nx" type="integer" min="0" max="100" value="90" label="Report Nx, Lx, etc metrics for specific value of 'x'" help="Value of 'x' for Nx, Lx, NGx, NGAx, etc metrics reported in addition to N50, L50, NG50, NGA50, etc" />
379 </section>
380 <param name="output_files" type="select" display="checkboxes" optional="true" multiple="true" label="Output files">
381 <option value="html" selected="true">HTML reports</option>
382 <option value="pdf">PDF reports</option>
383 <option value="tabular">Tabular reports</option>
384 <option value="log">Log file</option>
385 <option value="summary">Key metric summary (metagenome mode)</option>
386 <option value="krona">Krona charts (metagenome mode without reference genomes)</option>
387 </param>
388 </inputs>
389 <outputs>
390 <data name="report_tabular" format="tabular" label="${tool.name} on ${on_string}: tabular report" from_work_dir="outputdir/report.tsv">
391 <filter>assembly['type'] == 'genome' and 'tabular' in output_files</filter>
392 </data>
393 <data name="report_tabular_meta" format="tabular" label="${tool.name} on ${on_string}: tabular report for combined reference genome" from_work_dir="outputdir/combined_reference/report.tsv">
394 <filter>assembly['type'] == 'metagenome' and 'tabular' in output_files</filter>
395 </data>
396 <data name="report_html" format="html" label="${tool.name} on ${on_string}: HTML report" from_work_dir="outputdir/report.html">
397 <filter>assembly['type'] == 'genome' and 'html' in output_files</filter>
398 </data>
399 <data name="report_html_meta" format="html" label="${tool.name} on ${on_string}: HTML report for combined reference genome" from_work_dir="outputdir/combined_reference/report.html">
400 <filter>assembly['type'] == 'metagenome' and 'html' in output_files</filter>
401 </data>
402 <data name="report_pdf" format="pdf" label="${tool.name} on ${on_string}: PDF report" from_work_dir="outputdir/report.pdf">
403 <filter>assembly['type'] == 'genome' and 'pdf' in output_files</filter>
404 </data>
405 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outputdir/quast.log">
406 <filter>assembly['type'] == 'genome' and 'log' in output_files</filter>
407 </data>
408 <data name="log_meta" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outputdir/metaquast.log">
409 <filter>assembly['type'] == 'metagenome' and 'log' in output_files</filter>
410 </data>
411 <data name="mis_ass" format="tabular" label="${tool.name} on ${on_string}: Misassemblies report" from_work_dir="outputdir/contigs_reports/misassemblies_report.txt">
412 <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true'</filter>
413 <filter>'tabular' in output_files</filter>
414 </data>
415 <data name="unalign" format="tabular" label="${tool.name} on ${on_string}: Unaligned contigs report" from_work_dir="outputdir/contigs_reports/unaligned_report.tsv">
416 <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true'</filter>
417 <filter>'tabular' in output_files</filter>
418 </data>
419 <data name="kmers" format="tabular" label="${tool.name} on ${on_string}: K-mer-based metrics report" from_work_dir="outputdir/k_mer_stats/kmers_report.txt">
420 <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true' and assembly['ref']['k_mer']['k_mer_stats'] != ''</filter>
421 <filter>'tabular' in output_files</filter>
422 </data>
423 <data name="circos_output" format="png" from_work_dir="outputdir/circos/circos.png" label="${tool.name} on ${on_string}: Circos plot">
424 <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true' and assembly['ref']['circos']</filter>
425 </data>
426 <collection name="metrics_tabular" type="list" label="${tool.name} on ${on_string}: Tabular reports for key metrics" >
427 <discover_datasets pattern="(?P&lt;designation&gt;.+).tsv" directory="outputdir/summary/TSV/" format="tabular"/>
428 <filter>assembly['type'] == 'metagenome' and 'summary' in output_files</filter>
429 </collection>
430 <collection name="metrics_pdf" type="list" label="${tool.name} on ${on_string}: PDF reports for key metrics" >
431 <discover_datasets pattern="(?P&lt;designation&gt;.+).pdf" directory="outputdir/summary/PDF/" format="pdf"/>
432 <filter>assembly['type'] == 'metagenome' and 'summary' in output_files</filter>
433 </collection>
434 <data name="krona" format="html" label="${tool.name} on ${on_string}: Krona chart" from_work_dir="outputdir/krona_charts/*.html">
435 <filter>assembly['type'] == 'metagenome' and assembly['ref']['origin'] == 'none' and 'krona' in output_files</filter>
436 </data>
437 </outputs>
438 <tests>
439 <!-- Test 01: reference, genes annotations and operon coordinates -->
440 <test expect_num_outputs="2">
441 <conditional name="in">
442 <param name="custom" value="true"/>
443 <repeat name="inputs">
444 <param name="input" value="contigs1.fna"/>
445 <param name="labels" value="contig1"/>
446 </repeat>
447 <repeat name="inputs">
448 <param name="input" value="contigs2.fna"/>
449 <param name="labels" value="contig2"/>
450 </repeat>
451 </conditional>
452 <conditional name="assembly">
453 <param name="type" value="genome"/>
454 <conditional name="ref">
455 <param name="use_ref" value="true"/>
456 <conditional name="reference_source">
457 <param name="reference_source_selector" value="history"/>
458 </conditional>
459 <param name="r" value="reference.fna"/>
460 <param name="features" value="genes.gff"/>
461 <param name="operons" value="operons.bed"/>
462 <conditional name="k_mer">
463 <param name="k_mer_stats" value="--k-mer-stats"/>
464 <param name="k_mer_size" value="101" />
465 </conditional>
466 <param name="circos" value="true"/>
467 </conditional>
468 <param name="orga_type" value=""/>
469 </conditional>
470 <param name="min_contig" value="500"/>
471 <param name="split_scaffolds" value="false"/>
472 <section name="genes">
473 <conditional name="gene_finding">
474 <param name="tool" value="--gene_finding"/>
475 <param name="gene_thresholds" value="0,300,1500,3000"/>
476 </conditional>
477 <param name="rna_finding" value="true"/>
478 <param name="conserved_genes_finding" value="true"/>
479 <param name="min_identity" value="95.0"/>
480 </section>
481 <section name="alignments">
482 <param name="use_all_alignments" value="true"/>
483 <param name="min_alignment" value="65"/>
484 <param name="ambiguity_usage" value="one"/>
485 <param name="ambiguity_score" value="0.99"/>
486 </section>
487 <section name="advanced">
488 <param name="contig_thresholds" value="0,1000"/>
489 <param name="strict_NA" value="true"/>
490 <param name="extensive_mis_size" value="1000"/>
491 <param name="scaffold_gap_max_size" value="1000"/>
492 <param name="unaligned_part_size" value="500"/>
493 <param name="skip_unaligned_mis_contigs" value="true"/>
494 <param name="fragmented_max_indent" value="50"/>
495 </section>
496 <param name="output_files" value="html"/>
497 <output name="report_html" file="test1_report.html" ftype="html" compare="sim_size"/>
498 <output name="circos_output" file="test1_circos.png" ftype="png" compare="sim_size"/>
499 </test>
500 <!-- Test 02: all outputs -->
501 <test expect_num_outputs="8">
502 <conditional name="in">
503 <param name="custom" value="true"/>
504 <repeat name="inputs">
505 <param name="input" value="contigs1.fna"/>
506 <param name="labels" value="contig1"/>
507 </repeat>
508 <repeat name="inputs">
509 <param name="input" value="contigs2.fna"/>
510 <param name="labels" value="contig2"/>
511 </repeat>
512 </conditional>
513 <conditional name="assembly">
514 <param name="type" value="genome"/>
515 <conditional name="ref">
516 <param name="use_ref" value="true"/>
517 <conditional name="reference_source">
518 <param name="reference_source_selector" value="cached"/>
519 </conditional>
520 <param name="r" value="reference"/>
521 <param name="features" value="genes.gff"/>
522 <param name="operons" value="operons.bed"/>
523 <conditional name="k_mer">
524 <param name="k_mer_stats" value="--k-mer-stats"/>
525 <param name="k_mer_size" value="101" />
526 </conditional>
527 <param name="circos" value="true"/>
528 </conditional>
529 </conditional>
530 <param name="output_files" value="html,pdf,tabular,log"/>
531 <output name="report_html" file="test2_report.html" ftype="html" compare="sim_size"/>
532 <output name="report_pdf" file="test2_report.pdf" ftype="pdf" compare="sim_size"/>
533 <output name="report_tabular" file="test2_report.tab" ftype="tabular"/>
534 <output name="log" file="test2.log" ftype="txt" compare="sim_size"/>
535 <output name="mis_ass" file="test2_missasemblies.tab" ftype="tabular"/>
536 <output name="unalign" file="test2_unaligned.tab" ftype="tabular"/>
537 <output name="kmers" file="test2_kmers.tab" ftype="tabular"/>
538 <output name="circos_output" file="test2_circos.png" ftype="png" compare="sim_size"/>
539 </test>
540 <!-- Test 03: without reference -->
541 <test expect_num_outputs="3">
542 <conditional name="in">
543 <param name="custom" value="false"/>
544 <param name="inputs" value="contigs1.fna,contigs2.fna"/>
545 </conditional>
546 <conditional name="assembly">
547 <param name="type" value="genome"/>
548 <conditional name="ref">
549 <param name="use_ref" value="false"/>
550 </conditional>
551 <param name="orga_type" value="--eukaryote"/>
552 <param name="min_identity" value="95.0"/>
553 </conditional>
554 <param name="min_contig" value="500"/>
555 <param name="split_scaffolds" value="false"/>
556 <param name="large" value="false"/>
557 <section name="genes">
558 <conditional name="gene_finding">
559 <param name="tool" value="none"/>
560 </conditional>
561 <param name="rna_finding" value="false"/>
562 <param name="conserved_genes_finding" value="false"/>
563 </section>
564 <section name="alignments">
565 <param name="use_all_alignments" value="false"/>
566 <param name="min_alignment" value="65"/>
567 <param name="ambiguity_usage" value="one"/>
568 <param name="ambiguity_score" value="0.99"/>
569 <param name="fragmented" value="false"/>
570 </section>
571 <section name="advanced">
572 <param name="contig_thresholds" value="0,1000, 500"/>
573 <param name="strict_NA" value="false"/>
574 <param name="extensive_mis_size" value="1000"/>
575 <param name="scaffold_gap_max_size" value="1000"/>
576 <param name="unaligned_part_size" value="500"/>
577 <param name="skip_unaligned_mis_contigs" value="-"/>
578 </section>
579 <param name="output_files" value="html,pdf,log" />
580 <output name="log" file="test3.log" ftype="txt" compare="sim_size"/>
581 <output name="report_html" file="test3_report.html" compare="sim_size"/>
582 <output name="report_pdf" file="test3_report.pdf" compare="sim_size"/>
583 </test>
584 <!-- Test 04: metagenomics -->
585 <test expect_num_outputs="3">
586 <conditional name="in">
587 <param name="custom" value="false"/>
588 <param name="inputs" value="contigs3.fasta"/>
589 </conditional>
590 <conditional name="assembly">
591 <param name="type" value="metagenome"/>
592 <conditional name="ref">
593 <param name="origin" value="none"/>
594 </conditional>
595 </conditional>
596 <param name="min_contig" value="500"/>
597 <param name="split_scaffolds" value="false"/>
598 <param name="large" value="false"/>
599 <section name="genes">
600 <conditional name="gene_finding">
601 <param name="tool" value="--mgm"/>
602 </conditional>
603 <param name="rna_finding" value="false"/>
604 <param name="conserved_genes_finding" value="false"/>
605 </section>
606 <section name="alignments">
607 <param name="use_all_alignments" value="false"/>
608 <param name="min_alignment" value="65"/>
609 <param name="min_identity" value="95.0"/>
610 <param name="ambiguity_usage" value="one"/>
611 <param name="ambiguity_score" value="0.99"/>
612 <param name="fragmented" value="false"/>
613 </section>
614 <section name="advanced">
615 <param name="contig_thresholds" value="0,1000, 500"/>
616 <param name="strict_NA" value="false"/>
617 <param name="extensive_mis_size" value="1000"/>
618 <param name="scaffold_gap_max_size" value="1000"/>
619 <param name="unaligned_part_size" value="500"/>
620 <param name="skip_unaligned_mis_contigs" value="-"/>
621 </section>
622 <param name="output_files" value="log,html,tabular"/>
623 <output name="log_meta" ftype="txt">
624 <assert_contents>
625 <has_text text="Reference genomes are not found" />
626 </assert_contents>
627 </output>
628 <output name="report_tabular_meta" ftype="tabular">
629 <assert_contents>
630 <has_text text="# contigs (>= 0 bp)"/>
631 <has_text text="contigs3_fasta"/>
632 <has_text text="# N's per 100 kbp"/>
633 <has_n_lines n="17"/>
634 </assert_contents>
635 </output>
636 <output name="report_html_meta" ftype="html">
637 <assert_contents>
638 <has_text text="Quality Assessment Tool for Genome Assemblies" />
639 <has_text text="contigs3_fasta" />
640 <has_text text="Statistics without reference" />
641 </assert_contents>
642 </output>
643 </test>
644 <!-- Test 05: FASTQ read files -->
645 <test expect_num_outputs="3">
646 <conditional name="in">
647 <param name="custom" value="true"/>
648 <repeat name="inputs">
649 <param name="input" value="contigs1.fna"/>
650 <param name="labels" value="contig1"/>
651 </repeat>
652 <repeat name="inputs">
653 <param name="input" value="contigs2.fna"/>
654 <param name="labels" value="contig2"/>
655 </repeat>
656 </conditional>
657 <conditional name="reads">
658 <param name="reads_option" value="pacbio"/>
659 <param name="input_1" value="pacbio_01.fastq,pacbio_02.fastq,pacbio_03.fastq,pacbio_04.fastq"/>
660 </conditional>
661 <conditional name="assembly">
662 <param name="type" value="genome"/>
663 <conditional name="ref">
664 <param name="use_ref" value="true"/>
665 <conditional name="reference_source">
666 <param name="reference_source_selector" value="history"/>
667 </conditional>
668 <param name="r" value="reference.fna"/>
669 </conditional>
670 </conditional>
671 <section name="alignments">
672 <param name="upper_bound_assembly" value="true"/>
673 <param name="upper_bound_min_con" value="1"/>
674 </section>
675 <param name="output_files" value="tabular"/>
676 <output name="report_tabular" file="test5.tab" ftype="tabular"/>
677 <output name="mis_ass" ftype="tabular">
678 <assert_contents>
679 <has_text text="All statistics are based on contigs of size >= 500 bp"/>
680 <has_text text="# scaffold misassemblies"/>
681 <has_text text="contig1"/>
682 </assert_contents>
683 </output>
684 <output name="unalign" ftype="tabular">
685 <assert_contents>
686 <has_text text="Fully unaligned length"/>
687 <has_text text="contig1"/>
688 </assert_contents>
689 </output>
690 </test>
691 <!-- Test 06: FASTQ.gz read files -->
692 <test expect_num_outputs="1">
693 <conditional name="in">
694 <param name="custom" value="true"/>
695 <repeat name="inputs">
696 <param name="input" value="contigs1.fna"/>
697 <param name="labels" value="contig1"/>
698 </repeat>
699 <repeat name="inputs">
700 <param name="input" value="contigs2.fna"/>
701 <param name="labels" value="contig2"/>
702 </repeat>
703 </conditional>
704 <conditional name="reads">
705 <param name="reads_option" value="single"/>
706 <param name="input_1" value="pacbio_01.fastq.gz,pacbio_02.fastq.gz"/>
707 </conditional>
708 <param name="output_files" value="tabular"/>
709 <output name="report_tabular" file="test6.tab" ftype="tabular"/>
710 </test>
711 <!-- Test 07: FASTA.gz read files -->
712 <test expect_num_outputs="1">
713 <conditional name="in">
714 <param name="custom" value="true"/>
715 <repeat name="inputs">
716 <param name="input" value="contigs1.fna"/>
717 <param name="labels" value="contig1"/>
718 </repeat>
719 <repeat name="inputs">
720 <param name="input" value="contigs2.fna"/>
721 <param name="labels" value="contig2"/>
722 </repeat>
723 </conditional>
724 <conditional name="reads">
725 <param name="reads_option" value="single"/>
726 <param name="input_1" value="pacbio_01.fasta.gz,pacbio_02.fasta.gz"/>
727 </conditional>
728 <param name="output_files" value="tabular"/>
729 <output name="report_tabular" file="test7.tab" ftype="tabular"/>
730 </test>
731 <!-- Test 08: metagenomics all tab outputs-->
732 <test expect_num_outputs="3">
733 <conditional name="in">
734 <repeat name="inputs">
735 <param name="input" value="meta_contigs_1.fasta"/>
736 <param name="labels" value="meta_contigs_1"/>
737 </repeat>
738 <repeat name="inputs">
739 <param name="input" value="meta_contigs_2.fasta"/>
740 <param name="labels" value="meta_contigs_2"/>
741 </repeat>
742 </conditional>
743 <conditional name="assembly">
744 <param name="type" value="metagenome"/>
745 <conditional name="ref">
746 <param name="origin" value="history"/>
747 <param name="r" value="meta_ref_1.fasta,meta_ref_2.fasta,meta_ref_3.fasta"/>
748 </conditional>
749 <param name="min_identity" value="95.0"/>
750 </conditional>
751 <param name="min_contig" value="500"/>
752 <param name="split_scaffolds" value="false"/>
753 <param name="large" value="false"/>
754 <section name="genes">
755 <conditional name="gene_finding">
756 <param name="tool" value="none"/>
757 </conditional>
758 <param name="rna_finding" value="false"/>
759 <param name="conserved_genes_finding" value="false"/>
760 </section>
761 <section name="alignments">
762 <param name="use_all_alignments" value="false"/>
763 <param name="min_alignment" value="65"/>
764 <param name="ambiguity_usage" value="one"/>
765 <param name="ambiguity_score" value="0.99"/>
766 <param name="fragmented" value="false"/>
767 </section>
768 <section name="advanced">
769 <param name="contig_thresholds" value="0,1000"/>
770 <param name="strict_NA" value="false"/>
771 <param name="extensive_mis_size" value="1000"/>
772 <param name="scaffold_gap_max_size" value="1000"/>
773 <param name="unaligned_part_size" value="500"/>
774 <param name="skip_unaligned_mis_contigs" value="-"/>
775 </section>
776 <param name="output_files" value="tabular,summary"/>
777 <output name="report_tabular_meta" ftype="tabular">
778 <assert_contents>
779 <has_text text="# contigs (>= 0 bp)"/>
780 <has_text text="meta_ref_3_fasta"/>
781 <has_text text="# N's per 100 kbp"/>
782 <has_n_lines n="34"/>
783 </assert_contents>
784 </output>
785 <output_collection name="metrics_tabular" type="list" count="15"/>
786 <output_collection name="metrics_pdf" type="list" count="16"/>
787 </test>
788 <!-- Test 09: metagenomics log, html and krona outputs-->
789 <test expect_num_outputs="2">
790 <conditional name="in">
791 <repeat name="inputs">
792 <param name="input" value="meta_contigs_1.fasta"/>
793 <param name="labels" value="meta_contigs_1"/>
794 </repeat>
795 <repeat name="inputs">
796 <param name="input" value="meta_contigs_2.fasta"/>
797 <param name="labels" value="meta_contigs_2"/>
798 </repeat>
799 </conditional>
800 <conditional name="assembly">
801 <param name="type" value="metagenome"/>
802 <conditional name="ref">
803 <param name="origin" value="list"/>
804 <param name="references_list" value="Lactobacillus_delbrueckii_bulgaricus,Lactobacillus_reuteri"/>
805 </conditional>
806 <param name="min_identity" value="95.0"/>
807 </conditional>
808 <param name="min_contig" value="500"/>
809 <param name="split_scaffolds" value="false"/>
810 <param name="large" value="false"/>
811 <section name="genes">
812 <conditional name="gene_finding">
813 <param name="tool" value="none"/>
814 </conditional>
815 <param name="rna_finding" value="false"/>
816 <param name="conserved_genes_finding" value="false"/>
817 </section>
818 <section name="alignments">
819 <param name="use_all_alignments" value="false"/>
820 <param name="min_alignment" value="65"/>
821 <param name="ambiguity_usage" value="all"/>
822 <param name="ambiguity_score" value="0.99"/>
823 <param name="fragmented" value="false"/>
824 </section>
825 <section name="advanced">
826 <param name="contig_thresholds" value="0,1000"/>
827 <param name="strict_NA" value="false"/>
828 <param name="extensive_mis_size" value="1000"/>
829 <param name="scaffold_gap_max_size" value="1000"/>
830 <param name="unaligned_part_size" value="500"/>
831 <param name="skip_unaligned_mis_contigs" value="-"/>
832 </section>
833 <param name="output_files" value="html,log"/>
834 <output name="report_html_meta" ftype="html">
835 <assert_contents>
836 <has_text text="meta_contigs_2_fasta" />
837 <has_text text="combined_reference" />
838 <has_text text="Lactobacillus" />
839 </assert_contents>
840 </output>
841 <output name="log_meta" ftype="txt">
842 <assert_contents>
843 <has_text text="List of references was provided, starting to download reference genomes from NCBI" />
844 <has_text text="Lactobacillus_delbrueckii_bulgaricus" />
845 </assert_contents>
846 </output>
847 </test>
848 <!-- Test 10: Test new options -->
849 <test expect_num_outputs="1">
850 <conditional name="in">
851 <param name="custom" value="true"/>
852 <repeat name="inputs">
853 <param name="input" value="contigs1.fna"/>
854 <param name="labels" value="contig1"/>
855 </repeat>
856 <repeat name="inputs">
857 <param name="input" value="contigs2.fna"/>
858 <param name="labels" value="contig2"/>
859 </repeat>
860 </conditional>
861 <section name="alignments">
862 <param name="local_mis_size" value="210"/>
863 </section>
864 <conditional name="assembly">
865 <param name="type" value="genome"/>
866 <conditional name="ref">
867 <param name="use_ref" value="false"/>
868 </conditional>
869 </conditional>
870 <section name="advanced">
871 <param name="report_all_metrics" value="true"/>
872 <param name="x_for_Nx" value="80"/>
873 </section>
874 <param name="output_files" value="tabular"/>
875 <output name="report_tabular" file="test10_tabular_report.tab" ftype="tabular"/>
876 </test>
877 <!-- Test 11: Test paired fastq.gz inputs -->
878 <test expect_num_outputs="1">
879 <conditional name="in">
880 <param name="custom" value="true"/>
881 <repeat name="inputs">
882 <param name="input" value="contigs1.fna"/>
883 <param name="labels" value="contig1"/>
884 </repeat>
885 </conditional>
886 <conditional name="reads">
887 <param name="reads_option" value="paired"/>
888 <param name="input_1" value="reads1.fastq.gz" ftype="fastqsanger.gz"/>
889 <param name="input_2" value="reads2.fastq.gz" ftype="fastqsanger.gz"/>
890 </conditional>
891 <conditional name="assembly">
892 <param name="type" value="genome"/>
893 <conditional name="ref">
894 <param name="use_ref" value="false"/>
895 </conditional>
896 </conditional>
897 <param name="output_files" value="tabular"/>
898 <output name="report_tabular" ftype="tabular">
899 <assert_contents>
900 <has_text text="# contigs (>= 0 bp)"/>
901 <has_text text="contig1"/>
902 <has_text text="# N's per 100 kbp"/>
903 <has_n_lines n="22"/>
904 </assert_contents>
905 </output>
906 </test>
907 <!-- Test 12: Test paired-collection fastq.gz inputs -->
908 <test expect_num_outputs="1">
909 <conditional name="in">
910 <param name="custom" value="true"/>
911 <repeat name="inputs">
912 <param name="input" value="contigs1.fna"/>
913 <param name="labels" value="contig1"/>
914 </repeat>
915 </conditional>
916 <conditional name="reads">
917 <param name="reads_option" value="paired_collection"/>
918 <param name="input_1">
919 <collection type="paired">
920 <element name="forward" value="reads1.fastq.gz" ftype="fastqsanger.gz"/>
921 <element name="reverse" value="reads2.fastq.gz" ftype="fastqsanger.gz"/>
922 </collection>
923 </param>
924 </conditional>
925 <conditional name="assembly">
926 <param name="type" value="genome"/>
927 <conditional name="ref">
928 <param name="use_ref" value="false"/>
929 </conditional>
930 </conditional>
931 <param name="output_files" value="tabular"/>
932 <output name="report_tabular" ftype="tabular">
933 <assert_contents>
934 <has_text text="# contigs (>= 0 bp)"/>
935 <has_text text="contig1"/>
936 <has_text text="# N's per 100 kbp"/>
937 <has_n_lines n="15"/>
938 </assert_contents>
939 </output>
940 </test>
941 </tests>
942 <help>
943 <![CDATA[
944 **What it does**
945
946 QUAST = QUality ASsessment Tool. The tool evaluates genome assemblies by computing various metrics.
947
948 If you have one or multiple genome assemblies, you can assess their quality with Quast. It works with or without reference genome. If you are new to Quast, start by reading its `manual page <http://quast.sourceforge.net/docs/manual.html>`_.
949
950 **Using Quast without reference**
951
952 Without reference Quast can calculate a number of assembly related-metrics but cannot provide any information about potential misassemblies, inversions, translocations, etc. Suppose you have three assemblies produced by Unicycler corresponding to three different antibiotic treatments *car*, *pit*, and *cef* (these stand for carbenicillin, piperacillin, and cefsulodin, respectively). Evaluating them without reference will produce the following Quast outputs:
953
954 * Quast report in HTML format
955 * `Contig viewer <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_ (an HTML file)
956 * `Quast report <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_ in Tab-delimited format
957 * Quast log (a file technical information about Quast tool execution)
958
959 The **tab delimited Quast report** will contain the following information::
960
961 Assembly pit_fna cef_fna car_fna
962 # contigs (>= 0 bp) 100 91 94
963 # contigs (>= 1000 bp) 62 58 61
964 Total length (>= 0 bp) 6480635 6481216 6480271
965 Total length (>= 1000 bp) 6466917 6468946 6467103
966 # contigs 71 66 70
967 Largest contig 848753 848766 662053
968 Total length 6473173 6474698 6473810
969 GC (%) 66.33 66.33 66.33
970 N50 270269 289027 254671
971 N75 136321 136321 146521
972 L50 7 7 8
973 L75 15 15 16
974 # N's per 100 kbp 0.00 0.00 0.00
975
976 where values are defined as specified in `Quast manual <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_
977
978 **Quast report in HTML format** contains graphs in addition to the above metrics, while **Contig viewer** draws contigs ordered from longest to shortest. This ordering is suitable for comparing only largest contigs or number of contigs longer than a specific threshold. The viewer shows N50 and N75 with color and textual indication. If the reference genome is available or at least approximate genome length is known (see `--est-ref-size`), NG50 and NG75 are also shown. You can also tone down contigs shorter than a specified threshold using Icarus control panel:
979
980 .. image:: $PATH_TO_IMAGES/contig_view_noR.png
981 :width: 558
982 :height: 412
983
984 Also see `Plot description <http://quast.sourceforge.net/docs/manual.html#sec2>`_ section of the manual.
985
986 **Using Quast with reference**
987
988 Car, pit, and cef are in fact assemblies of *Pseudomonas aeruginosa* UCBPP-PA14, so we can use its genome as a reference (by supplying a Fasta file containing *P. aeruginosa* pa14 genome to **Reference genome** input box). The following outputs will be produced (note the alignment viewer):
989
990 * Quast report in HTML format
991 * `Contig viewer <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_ (an HTML file)
992 * `Alignment viewer <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_ (an HTML file)
993 * `Quast report <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_ in Tab-delimited format
994 * Summary of `misassemblies <http://quast.sourceforge.net/docs/manual.html#sec3.1.2>`_
995 * Summary of `unaligned contigs <http://quast.sourceforge.net/docs/manual.html#sec3.1.3>`_
996 * Quast log (a file technical information about Quast tool execution)
997
998 With the reference Quast produces a much more comprehensive set of results::
999
1000 Assembly pit_fna cef_fna car_fna
1001 # contigs (>= 0 bp) 100 91 94
1002 # contigs (>= 1000 bp) 62 58 61
1003 Total length (>= 0 bp) 6480635 6481216 6480271
1004 Total length (>= 1000 bp) 6466917 6468946 6467103
1005 # contigs 71 66 70
1006 Largest contig 848753 848766 662053
1007 Total length 6473173 6474698 6473810
1008 Reference length 6537648 6537648 6537648
1009 GC (%) 66.33 66.33 66.33
1010 Reference GC (%) 66.29 66.29 66.29
1011 N50 270269 289027 254671
1012 NG50 270269 289027 254671
1013 N75 136321 136321 146521
1014 NG75 136321 136321 136321
1015 L50 7 7 8
1016 LG50 7 7 8
1017 L75 15 15 16
1018 LG75 15 15 17
1019 # misassemblies 0 0 0
1020 # misassembled contigs 0 0 0
1021 Misassembled contigs length 0 0 0
1022 # local misassemblies 1 1 2
1023 # unaligned mis. contigs 0 0 0
1024 # unaligned contigs 0 + 0 0 + 0 0 + 0
1025 part part part
1026 Unaligned length 0 0 0
1027 Genome fraction (%) 99.015 99.038 99.025
1028 Duplication ratio 1.000 1.000 1.000
1029 # N's per 100 kbp 0.00 0.00 0.00
1030 # mismatches per 100 kbp 3.82 3.63 3.49
1031 # indels per 100 kbp 1.19 1.13 1.13
1032 Largest alignment 848753 848766 662053
1033 Total aligned length 6473163 6474660 6473792
1034 NA50 270269 289027 254671
1035 NGA50 270269 289027 254671
1036 NA75 136321 136321 146521
1037 NGA75 136321 136321 136321
1038 LA50 7 7 8
1039 LGA50 7 7 8
1040 LA75 15 15 16
1041 LGA75 15 15 17
1042
1043 where, again, values are defined as specified in `Quast manual <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_. You can see that this report includes a variety of data that can only be computer against a reference assembly.
1044
1045 Using reference also produces an **Alignment viewer**:
1046
1047 .. image:: $PATH_TO_IMAGES/Align_view.png
1048 :width: 515
1049 :height: 395
1050
1051 Alignment viewer highlights regions of interest as, in this case, missassemblies that can potentially point to genome rearrangements (see more `here <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_).
1052
1053 ]]>
1054 </help>
1055 <expand macro="citations"/>
1056 </tool>