comparison quast.xml @ 4:0834c823d4b9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/quast commit 2c56b5f07500507dad503aab6ec0619ec37f8b87
author iuc
date Mon, 12 Feb 2018 09:05:20 -0500
parents 6fcbee531de6
children 81df4950d65b
comparison
equal deleted inserted replaced
3:6fcbee531de6 4:0834c823d4b9
1 <tool id="quast" name="Quast" version="4.5" > 1 <tool id="quast" name="Quast" version="4.6.3" >
2 <description>Genome assembly Quality</description> 2 <description>Genome assembly Quality</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="4.5">quast</requirement> 4 <requirement type="package" version="4.6.3">quast</requirement>
5 </requirements> 5 </requirements>
6 <stdio> 6 <stdio>
7 <regex match="ERROR! exception caught!" 7 <regex match="ERROR! exception caught!"
8 source="both" 8 source="both"
9 level="fatal" 9 level="fatal"
32 #if $input_size: 32 #if $input_size:
33 --est-ref-size $input_size 33 --est-ref-size $input_size
34 #end if 34 #end if
35 --min-contig $min_contig 35 --min-contig $min_contig
36 -l 36 -l
37 #set names = ','.join( ['"'+ re.sub('[^\w\-_]', '_', str( $x.input.element_identifier))+'"' for $x in $files ]) 37 #set names = ','.join( ['"'+ re.sub('[^\w\-_]', '_', str( $x.element_identifier))+'"' for $x in $input ])
38 38
39 $names 39 $names
40 --contig-thresholds $threshold_contig 40 --contig-thresholds $threshold_contig
41 #for $k in $files: 41 #for $k in $input:
42 #if $k.type_file == "scaffold": 42 $k
43 -s
44 #end if
45 $k.input
46 #end for 43 #end for
47 && 44 &&
48 mv outputdir/report.txt '$quast_out' &&
49 mv outputdir/report.tsv '$quast_tsv' && 45 mv outputdir/report.tsv '$quast_tsv' &&
50 mv outputdir/report.tex '$quast_tex' && 46 ## The sed string below removes non-functional "Main menu" button from the viewer
51 mv outputdir/icarus_viewers/contig_size_viewer.html '$icarus' && 47 sed '\:<div class="btn btn-inverse" id="to_main_menu_button">:,\:</div>:d' outputdir/icarus_viewers/contig_size_viewer.html > '$contig_size_viewer' &&
52 mv outputdir/report.html '$report_html' 48 #if $input_ref:
49 ## The sed string below removes non-functional "Main menu" button from the viewer
50 sed '\:<div class="btn btn-inverse" id="to_main_menu_button">:,\:</div>:d' outputdir/icarus_viewers/alignment_viewer.html > '$alignment_viewer' &&
51 #end if
52 ## The sed string below removes non-functional link from the report page
53 sed "\:<div id='icarus-json'>:,\:</div>:d" outputdir/report.html > '$report_html' &&
54 mv outputdir/report.pdf '$report_pdf' &&
55 mv outputdir/contigs_reports/misassemblies_report.tsv '$mis_ass_tsv' &&
56 mv outputdir/contigs_reports/unaligned_report.tsv '$unalign_tsv' &&
57 mv outputdir/quast.log '$log_txt'
53 ]]> 58 ]]>
54 </command> 59 </command>
55 <inputs> 60 <inputs>
56 <repeat name="files" title="Input assembly files" min="1"> 61 <param type="data" format="fasta" name="input" label="Contigs/scaffolds output file" multiple="True"/>
57 <param type="data" format="fasta" name="input" label="Contigs/scaffolds output file"/> 62 <param name="type_file" type="select" label="Type of data">
58 <param name="type_file" type="select" label="Type of data"> 63 <option value="contig">Contig</option>
59 <option value="contig">Contig</option> 64 <option value="scaffold">Scaffold</option>
60 <option value="scaffold">Scaffold</option> 65 </param>
61 </param>
62 </repeat>
63 <param name="input_size" type="integer" label="Size of reference genome" optional="True" argument="--est-ref-size" 66 <param name="input_size" type="integer" label="Size of reference genome" optional="True" argument="--est-ref-size"
64 help="Estimated reference genome size (in bp) for computing NGx statistics, if known. This value will be used only if a reference genome file is not specified "/> 67 help="Estimated reference genome size (in bp) for computing NGx statistics, if known. This value will be used only if a reference genome file is not specified "/>
65 <param name="input_ref" type="data" format="fasta" label="Reference File" optional="True" argument="-R" 68 <param name="input_ref" type="data" format="fasta" label="Reference genome" optional="True" argument="-R"
66 help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference."/> 69 help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference."/>
67 <param name="annot" type="data" format="txt" label="Gene Annotations" optional="True" argument="-G" 70 <param name="annot" type="data" format="gff, gff3, bed" label="Gene Annotations" optional="True" argument="-G"
68 help="File with gene positions in the reference genome. "/> 71 help="Gene coordinates for the reference genome (only relevant if the reference genome is used). "/>
69 <param name="input_operon" type="data" format="txt" label="Operon Annotations" optional="True" argument="-O" 72 <param name="input_operon" type="data" format="gff, gff3, bed" label="Operon Annotations" optional="True" argument="-O" help="Operon coordinates for the reference genome (only relevant if the reference genome is used)."/>
70 help="File with operon positions in the reference genome"/>
71 <param name="gene_selection" type="select" label="Type of organism"> 73 <param name="gene_selection" type="select" label="Type of organism">
72 <option value="prokaryotes">Prokaryotes</option> 74 <option value="prokaryotes">Prokaryotes</option>
73 <option value="eukaryote">Eukaryote</option> 75 <option value="eukaryote">Eukaryote</option>
74 <option value="metagenes">Metagenomes</option>
75 </param> 76 </param>
76 <param name="min_contig" type="integer" value="500" label="Lower Threshold" argument="--min-contig" 77 <param name="min_contig" type="integer" value="500" label="Lower Threshold" argument="--min-contig"
77 help="Set the lower threshold for a contig length. Shorter contigs won't be taken into account (default is 500)"/> 78 help="Set the lower threshold for a contig length. Shorter contigs won't be taken into account [default is 500]"/>
78 <param name="threshold_contig" type="text" value="0,1000" label="Thresholds" argument="--contig-thresholds" 79 <param name="threshold_contig" type="text" value="0,1000" label="Thresholds" argument="--contig-thresholds"
79 help="Set the thresholds for contig length. Comma-separated list of contig length thresholds.(default is 0,1000)"/> 80 help="Set the thresholds for contig length. Comma-separated list of contig length thresholds.[default is 0,1000]"/>
80 </inputs> 81 </inputs>
81 <outputs> 82 <outputs>
82 <data format="txt" name="quast_out" label="Quast report.txt" from_work_dir="Quast_report.txt"/> 83 <data format="txt" name="log_txt" label="Quast: Log" />
83 <data format="tsv" name="quast_tsv" label="Quast report.tsv" from_work_dir="Quast_report.tsv"/> 84 <data format="tabular" name="mis_ass_tsv" label="Quast: Misassemblies">
84 <data format="tex" name="quast_tex" label="Quast report.tex" from_work_dir="Quast_report.tex"/> 85 <filter>input_ref is not None</filter>
85 <data format="html" name="icarus" label="Icarus Contig size viewer" from_work_dir="Icarus.html"/> 86 </data>
86 <data format="html" name="report_html" label="Quast report.html" from_work_dir="Quast_report.html"/> 87 <data format="tabular" name="unalign_tsv" label="Quast: Unaligned contigs">
88 <filter>input_ref is not None</filter>
89 </data>
90 <data format="html" name="contig_size_viewer" label="Quast: Contig view"/>
91 <data format="html" name="alignment_viewer" label="Quast: Alignment view">
92 <filter>input_ref is not None</filter>
93 </data>
94 <data format="tabular" name="quast_tsv" label="Quast: Report (tabulal)"/>
95 <data format="html" name="report_html" label="Quast: Report (HTML)"/>
96 <data format="pdf" name="report_pdf" label="Quast: Report (PDF)"/>
87 </outputs> 97 </outputs>
88 <tests> 98 <tests>
89 <test> 99 <test>
90 <param name="input" value="contigs_1.fasta"/> 100 <!-- Test with reference and genes annotations -->
91 <param name="input.type_file" value="contig"/> 101 <param name="input" value="contigs2.fna,contigs1.fna"/>
102 <param name="input_ref" value="reference.fna"/>
103 <param name="type_file" value="contig"/>
104 <param name="annot" value="genes.gff"/>
92 <param name="gene_selection" value="prokaryotes"/> 105 <param name="gene_selection" value="prokaryotes"/>
93 <output name="quast_tsv" file="Quast_report.tsv" lines_diff="4"/> 106 <output name="quast_tsv" file="test1_output.tsv" lines_diff="4"/>
94 </test> 107 </test>
95 <test> 108 <test>
96 <param name="input" value="contigs_1.fasta"/> 109 <!-- Test without reference -->
97 <param name="input.type_file" value="contig"/> 110 <param name="input" value="contigs2.fna,contigs1.fna"/>
98 <param name="input_ref" value="NC000913.3_1-6650.fasta"/> 111 <param name="type_file" value="contig"/>
99 <param name="gene_selection" value="prokaryotes"/> 112 <output name="quast_tsv" file="test2_output.tsv" lines_diff="4"/>
100 <output name="quast_tsv" file="Quast_report_withref.tsv" lines_diff="4"/>
101 </test>
102 <test>
103 <param name="input" value="contigs_1.fasta"/>
104 <param name="input.type_file" value="contig"/>
105 <param name="input_ref" value="NC000913.3_1-6650.fasta"/>
106 <param name="annot" value="NC000913.3_1-6650.gff"/>
107 <param name="gene_selection" value="prokaryotes"/>
108 <output name="quast_tsv" file="Quast_report_withref_withgenes.tsv" lines_diff="4"/>
109 </test> 113 </test>
110 </tests> 114 </tests>
111 <help> 115 <help>
112 <![CDATA[ 116 <![CDATA[
113 **Description** 117 **What it does**
114 118
115 Galaxy tool wrapper for the QUAST tool. Quast stands for QUality ASsessment Tool. It evaluates genome assemblies by computing various metrics. 119 QUAST = QUality ASsessment Tool. The tool evaluates genome assemblies by computing various metrics.
116 120
117 ----- 121 If you have one or multiple genome assemblies, you can assess their quality with Quast. It works with or without reference genome. If you are new to Quast, start by reading its `manual page <http://quast.bioinf.spbau.ru/manual.html>`_.
118 122
119 **Inputs and Outputs** 123 **Using Quast without reference**
120 124
121 - Input: 125 Without reference Quast can calculate a number of assembly related-metrics but cannot provide any information about potential misassemblies, inversions, translocations, etc. Suppose you have three assemblies produced by Unicycler corresponding to three different antibiotic treatments *car*, *pit*, and *cef* (these stand for carbenicillin, piperacillin, and cefsulodin, respectively). Evaluating them without reference will produce the following Quast outputs:
122 + The tool accepts assemblies and references in FASTA format. 126
123 + The tool accepts annotation and operon files in: 127 * Quast report in HTML format
124 + GFF, versions 2 and 3 (note: feature/type field should be either "gene" or "operon") 128 * `Contig viewer <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_ (an HTML file)
125 + the format used by NCBI for genes ("Summary (text)"); 129 * `Quast report <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_ in Tab-delimited format
126 + four tab-separated columns: sequence name, gene/operon id, start position, end position 130 * Quast log (a file technical information about Quast tool execution)
127 131
128 - Output: 132 The **tab delimited Quast report** will contain the following information::
129 + An assessment summary in plain text format 133
130 + An assessment summary in tabulation separated values format 134 Assembly pit_fna cef_fna car_fna
131 + An assessment summary in LateX format 135 # contigs (>= 0 bp) 100 91 94
132 + An assessment summary in HTML format 136 # contigs (>= 1000 bp) 62 58 61
133 + An HTML view of contig sizes wit Icarus 137 Total length (>= 0 bp) 6480635 6481216 6480271
134 138 Total length (>= 1000 bp) 6466917 6468946 6467103
139 # contigs 71 66 70
140 Largest contig 848753 848766 662053
141 Total length 6473173 6474698 6473810
142 GC (%) 66.33 66.33 66.33
143 N50 270269 289027 254671
144 N75 136321 136321 146521
145 L50 7 7 8
146 L75 15 15 16
147 # N's per 100 kbp 0.00 0.00 0.00
148
149 where values are defined as specified in `Quast manual <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_
150
151 **Quast report in HTML format** contains graphs in addition to the above metrics, while **Contig viewer** draws contigs ordered from longest to shortest. This ordering is suitable for comparing only largest contigs or number of contigs longer than a specific threshold. The viewer shows N50 and N75 with color and textual indication. If the reference genome is available or at least approximate genome length is known (see `--est-ref-size`), NG50 and NG75 are also shown. You can also tone down contigs shorter than a specified threshold using Icarus control panel:
152
153 .. image:: $PATH_TO_IMAGES/contig_view_noR.png
154 :width: 558
155 :height: 412
156
157 Also see `Plot description <http://quast.bioinf.spbau.ru/manual.html#sec3.2>`_ section of the manual.
158
159 **Using Quast with reference**
160
161 Car, pit, and cef are in fact assemblies of *Pseudomonas aeruginosa* UCBPP-PA14, so we can use its genome as a reference (by supplying a Fasta file containing *P. aeruginosa* pa14 genome to **Reference genome** input box). The following outputs will be produced (note the alignment viewer):
162
163 * Quast report in HTML format
164 * `Contig viewer <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_ (an HTML file)
165 * `Alignment viewer <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_ (an HTML file)
166 * `Quast report <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_ in Tab-delimited format
167 * Summary of `misassemblies <http://quast.bioinf.spbau.ru/manual.html#sec3.1.2>`_
168 * Summary of `unaligned contigs <http://quast.bioinf.spbau.ru/manual.html#sec3.1.3>`_
169 * Quast log (a file technical information about Quast tool execution)
170
171 With the reference Quast produces a much more comprehensive set of results::
172
173 Assembly pit_fna cef_fna car_fna
174 # contigs (>= 0 bp) 100 91 94
175 # contigs (>= 1000 bp) 62 58 61
176 Total length (>= 0 bp) 6480635 6481216 6480271
177 Total length (>= 1000 bp) 6466917 6468946 6467103
178 # contigs 71 66 70
179 Largest contig 848753 848766 662053
180 Total length 6473173 6474698 6473810
181 Reference length 6537648 6537648 6537648
182 GC (%) 66.33 66.33 66.33
183 Reference GC (%) 66.29 66.29 66.29
184 N50 270269 289027 254671
185 NG50 270269 289027 254671
186 N75 136321 136321 146521
187 NG75 136321 136321 136321
188 L50 7 7 8
189 LG50 7 7 8
190 L75 15 15 16
191 LG75 15 15 17
192 # misassemblies 0 0 0
193 # misassembled contigs 0 0 0
194 Misassembled contigs length 0 0 0
195 # local misassemblies 1 1 2
196 # unaligned mis. contigs 0 0 0
197 # unaligned contigs 0 + 0 0 + 0 0 + 0
198 part part part
199 Unaligned length 0 0 0
200 Genome fraction (%) 99.015 99.038 99.025
201 Duplication ratio 1.000 1.000 1.000
202 # N's per 100 kbp 0.00 0.00 0.00
203 # mismatches per 100 kbp 3.82 3.63 3.49
204 # indels per 100 kbp 1.19 1.13 1.13
205 Largest alignment 848753 848766 662053
206 Total aligned length 6473163 6474660 6473792
207 NA50 270269 289027 254671
208 NGA50 270269 289027 254671
209 NA75 136321 136321 146521
210 NGA75 136321 136321 136321
211 LA50 7 7 8
212 LGA50 7 7 8
213 LA75 15 15 16
214 LGA75 15 15 17
215
216 where, again, values are defined as specified in `Quast manual <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_. You can see that this report includes a variety of data that can only be computer against a reference assembly.
217
218 Using reference also produces an **Alignment viewer**:
219
220 .. image:: $PATH_TO_IMAGES/Align_view.png
221 :width: 515
222 :height: 395
223
224 Alignment viewer highlights regions of interest as, in this case, missassemblies that can potentially point to genome rearrangements (see more `here <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_).
135 225
136 ]]> 226 ]]>
137 </help> 227 </help>
138 228
139 <citations> 229 <citations>