Mercurial > repos > iuc > quast

<tool id="quast" name="Quast" version="@TOOL_VERSION@+galaxy0" >
    <description>Genome assembly Quality</description>
    <macros>
        <token name="@TOOL_VERSION@">5.0.2</token>
        <xml name="gene_thresholds">
            <param name="gene_thresholds" argument="--gene-thresholds" type="text" value="0,300,1500,3000" label="Comma-separated list of thresholds (in bp) for gene lengths to find with a finding tool"/>
        </xml>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">quast</requirement>
    </requirements>
    <command detect_errors="exit_code">
    <![CDATA[
#import re

#if str($in.custom) == 'false'
    #set $labels = ','.join( [re.sub('[^\w\-_]', '_', str($x.element_identifier)) for $x in $in.inputs ])
#else
    #set $labels = []
    #for $x in $in.inputs
        #if str($x.labels) != ''
            #silent $labels.append(re.sub('[^\w\-_]', '_', str($x.labels)))
        #else
            #silent $labels.append(re.sub('[^\w\-_]', '_', str($x.input.element_identifier)))
        #end if
    #end for
    #set $labels = ','.join($labels)
#end if

#if $assembly.type == 'metagenome' and $assembly.ref.origin == 'list'
    #set $temp_ref_list_fp = 'temp_ref_list_fp'
    #set $temp_ref_list_f = open($temp_ref_list_fp, 'w')
    #silent $temp_ref_list_f.write('\n'.join([x.strip() for x in $assembly.ref.references_list.split(',')]))
    #silent $temp_ref_list_f.close()
#end if

#if $assembly.type == 'genome'
quast
#else
metaquast
#end if
    -o outputdir

#if $assembly.type == 'genome'
    #if $assembly.ref.use_ref == 'true'
    -r '$assembly.ref.r'
        #if $assembly.ref.features
    --features '$assembly.ref.features'
        #end if
        #if $assembly.ref.operons
    --operons '$assembly.ref.operons'
        #end if
    #else if $assembly.ref.est_ref_size
    --est-ref-size $assembly.ref.est_ref_size
    #end if
    $assembly.orga_type
#else
    #if $assembly.ref.origin == 'history'
    -r '$assembly.ref.r'
    #else if $assembly.ref.origin == 'list'
    --references-list '$temp_ref_list_fp'
    #else if $assembly.ref.origin == 'silva'
    --test-no-ref
    --max-ref-num '$assembly.ref.max_ref_num'
    #end if
#end if
    --min-contig $min_contig
    --threads \${GALAXY_SLOTS:-1}
    $split_scaffolds
    --labels $labels
    $large
    $k_mer.k_mer_stats
#if str($k_mer.k_mer_stats) != ''
    --k-mer-size $k_mer.k_mer_size
#end if
    $circos
#if str($genes.gene_finding.tool) != 'none'
    $genes.gene_finding.tool
    #if $genes.gene_finding.tool == '--gene_finding' or $genes.gene_finding.tool == '--glimmer'
        #set $gene_threshold = ','.join([x.strip() for x in str($genes.gene_finding.gene_thresholds).split(',')])
        --gene-thresholds '$gene_threshold'
    #end if
#end if
    $genes.rna_finding
    $genes.conserved_genes_finding
    $al.use_all_alignments
    --min-alignment $al.min_alignment
    --min-identity $al.min_identity
    --ambiguity-usage '$al.ambiguity_usage'
    $al.fragmented
    #set $contig_thresholds = ','.join([x.strip() for x in str($contig_thresholds).split(',')])
    --contig-thresholds '$contig_thresholds'
    $strict_NA
    --extensive-mis-size $extensive_mis_size
    --scaffold-gap-max-size $scaffold_gap_max_size
    --unaligned-part-size $unaligned_part_size
    $skip_unaligned_mis_contigs

#if str($in.custom) == 'false'
    #for $k in $in.inputs
    '$k'
    #end for
#else
    #for $k in $in.inputs
    '$k.input'
    #end for
#end if

&& mkdir -p '$report_html.files_path'
&& cp outputdir/*.html '$report_html.files_path'
#if ($assembly.type == 'genome' and $assembly.ref.use_ref) or ($assembly.type == 'metagenome' and $assembly.ref.origin != 'none')
    && cp -R outputdir/icarus_viewers '$report_html.files_path'
#end if
    ]]>
    </command>
    <inputs>
        <conditional name="in">
            <param name="custom" type="select" label="Use customized names for the input files?" help="They will be used in reports, plots and logs">
                <option value="true">Yes, specify custom names for</option>
                <option value="false" selected="true">No, use dataset names</option>
            </param>
            <when value="true">
                <repeat name="inputs" title="Contigs/scaffolds" min="1">
                    <param name="input" type="data" format="fasta" label="Contigs/scaffolds file"/>
                    <param argument="--labels" type="text" value="" label="Name"/>
                </repeat>
            </when>
            <when value="false">
                <param name="inputs" type="data" format="fasta" multiple="true" label="Contigs/scaffolds file"/>
            </when>
        </conditional>
        <conditional name="assembly">
            <param name="type" type="select" label="Type of assembly">
                <option value="genome">Genome</option>
                <option value="metagenome">Metagenome</option>
            </param>
            <when value="genome">
                <conditional name="ref">
                    <param name="use_ref" type="select" label="Use a reference genome?" help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference.">
                        <option value="true">Yes</option>
                        <option value="false" selected="true">No</option>
                    </param>
                    <when value="true">
                        <param argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
                        <param argument="--features" type="data" format="gff, gff3, bed" optional="true" label="Genomic feature positions in the reference genome" help="Gene coordinates for the reference genome"/>
                        <param argument="--operons" type="data" format="gff, gff3, bed" optional="true" label="Operon positions in the reference genome" help="Operon coordinates for the reference genome"/>
                    </when>
                    <when value="false">
                        <param name="est_ref_size" argument="--est-ref-size" type="integer" optional="true" label="Estimated reference genome size (in bp) for computing NGx statistics" help=""/>
                    </when>
                </conditional>
                <param name="orga_type" type="select" label="Type of organism">
                    <option value="">Prokaryotes: use of GeneMarkS for gene finding</option>
                    <option value="--eukaryote">Eukaryote (--eukaryote): use of GeneMark-ES for gene finding, Barrnap for ribosomal RNA genes prediction, BUSCO for conserved orthologs finding</option>
                    <option value="--fungus">Fungus (--fungus): use of GeneMark-ES for gene finding, Barrnap for ribosomal RNA genes prediction, BUSCO for conserved orthologs finding</option>
                </param>
            </when>
            <when value="metagenome">
                <conditional name="ref">
                    <param name="origin" type="select" label="Reference genome" help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference.">
                        <option value="history">From history</option>
                        <option value="list">From list</option>
                        <option value="silva">From SILVA database</option>
                        <option value="none" selected="true">None</option>
                    </param>
                    <when value="history">
                        <param argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
                    </when>
                    <when value="list">
                        <param name="references_list" argument="references-list" type="text" value="" label="Comma-separated list of reference genomes" help="MetaQUAST will search for these references in the NCBI database and will download the found ones"/>
                    </when>
                    <when value="silva">
                        <param name="max_ref_num" argument="-max-ref-num" type="integer" value="50" label="Maximum number of reference genomes (per each assembly) to download after searching in the SILVA databa" />
                    </when>
                    <when value="none"/>
                </conditional>
            </when>
        </conditional>
        <param name="min_contig" argument="--min-contig" type="integer" value="500" label="Lower threshold for a contig length (in bp)" help="Shorter contigs won't be taken into account"/>
        <param name="split_scaffolds" argument="--split-scaffolds" type="boolean" truevalue="--split-scaffolds" falsevalue="" checked="false" label="Are assemblies scaffolds rather than contigs?" help="QUAST will add split versions of assemblies to the comparison. Assemblies are split by continuous fragments of N's of length >= 10. If broken version is equal to the original assembly (i.e. nothing was split) it is not included in the comparison."/>
        <param argument="--large" type="boolean" truevalue="--large" falsevalue="" checked="false" label="Is genome large (> 100 Mbp)?" help="Use optimal parameters for evaluation of large genomes. Affects speed and accuracy. In particular, imposes --eukaryote --min-contig 3000 --min-alignment 500 --extensive-mis-size 7000 (can be overridden manually with the corresponding options). In addition, this mode tries to identify misassemblies caused by transposable elements and exclude them from the number of misassemblies."/>
        <conditional name="k_mer">
            <param name="k_mer_stats" argument="--k-mer-stats" type="select" label="Compute k-mer-based quality metrics?" help="It is recommended for large genomes. This may significantly increase memory and time consumption on large genomes">
                <option value="--k-mer-stats">Yes</option>
                <option value="" selected="true">No</option>
            </param>
            <when value="--k-mer-stats">
                <param name="k_mer_size" argument="--k-mer-size" type="integer" value="101" label="Size of k" />
            </when>
            <when value=""/>
        </conditional>
        <param argument="--circos" type="boolean" truevalue="--circos" falsevalue="" checked="false" label="Draw Circos plot?" help=""/>
        <section name="genes" title="Genes">
            <conditional name="gene_finding">
                <param name="tool" type="select" label="Tool for gene prediction" help="">
                    <option value="none">Don't predict genes</option>
                    <option value="--gene-finding">GeneMarkS if prokaryotes or GeneMark-ES if eukaryotes or fungi</option>
                    <option value="--mgm">MetaGeneMark, specially for metagenomic assembly</option>
                    <option value="--glimmer">Glimmer</option>
                </param>
                <when value="none"/>
                <when value="--gene-finding">
                    <expand macro="gene_thresholds"/>
                </when>
                <when value="--mgm"/>
                <when value="--glimmer">
                    <expand macro="gene_thresholds"/>
                </when>
            </conditional>
            <param name="rna_finding" argument="--rna-finding" type="boolean" truevalue="--rna-finding" falsevalue="" checked="false" label="Enables ribosomal RNA gene finding?" help="By default, we assume that the genome is prokaryotic, and Barrnap uses the bacterial database for rRNA prediction. If the genome is eukaryotic (fungal), use --eukaryote (--fungus) option to force Barrnap to work with the eukaryotic (fungal) database. "/>
            <param name="conserved_genes_finding" argument="--conserved-genes-finding" type="boolean" truevalue="--conserved-genes-finding" falsevalue="" checked="false" label="Enables search for Universal Single-Copy Orthologs using BUSCO?" help="By default, we assume that the genome is prokaryotic, and BUSCO uses the bacterial database of orthologs. If the genome is eukaryotic (fungal), use --eukaryote (--fungus) option to force BUSCO to work with the eukaryotic (fungal) database. "/>
        </section>
        <section name="al" title="Alignments">
            <param name="use_all_alignments" argument="--use-all-alignments" type="boolean" truevalue="--use-all-alignments" falsevalue="" checked="false" label="Use all alignments as in QUAST v.1.*. to compute genome fraction, # genomic features, # operons metrics?" help="By default, QUAST v.2.0 and higher filters out ambiguous and redundant alignments, keeping only one alignment per contig (or one set of non-overlapping or slightly overlapping alignments)"/>
            <param name="min_alignment" argument="--min-alignment" type="integer" value="65" label="Minimum length of alignment" help="Alignments shorter than this value will be filtered. Note that all alignments shorter than 65 bp will be filtered regardless of this threshold."/>
            <param name="min_identity" argument="--min-identity" type="float" value="95.0" label="Minimum IDY% considered as proper alignment" help="Alignments with IDY% worse than this value will be filtered. ote that all alignments with IDY% less than 80.0% will be filtered regardless of this threshold. "/>
            <param name="ambiguity_usage" argument="--ambiguity-usage" type="select" label="How processing equally good alignments of a contig (probably repeats)?" help="">
                <option value="none">Skip all such alignments</option>
                <option value="one" selected="true">Take only one (the very best one)</option>
                <option value="all">Use all alignments. It can cause a significant increase of # mismatches (repeats are almost always inexact due to accumulated SNPs, indels, etc.). It is useful for metagenomic assemblies where ambiguous alignments might represent homologous sequences of different strains</option>
            </param>
            <param name="ambiguity_score" argument="--ambiguity-score" type="float" value="0.99" min="0.8" max="1.0" label="Score S for defining equally good alignments of a single contig" help="All alignments are sorted by decreasing LEN × IDY% value. All alignments with LEN × IDY% less than S × best(LEN × IDY%) are discarded. "/>
            <param argument="--fragmented" type="boolean" truevalue="--fragmented" falsevalue="" checked="false" label="Use all alignments as in QUAST v.1.*. to compute genome fraction, # genomic features, # operons metrics?" help="By default, QUAST v.2.0 and higher filters out ambiguous and redundant alignments, keeping only one alignment per contig (or one set of non-overlapping or slightly overlapping alignments)"/>
            <param name="fragmented_max_indent" argument="--fragmented-max-indent" type="integer" value="50" label="Mark translocation as fake if both alignments are located no further than N bases from the ends of the reference fragments" help="The value should be less than extensive misassembly size"/>
            <param name="upper_bound_assembly" argument="--upper-bound-assembly" type="boolean" truevalue="--upper-bound-assembly" falsevalue="" checked="false" label="Simulate upper bound assembly based on the reference genome and a given set reads?" help="Mate-pairs or long reads, such as Pacbio SMRT/Oxford Nanopore, are REQUIRED. This assembly is added to the comparison and could be useful for estimating the upper bounds of completeness and contiguity that theoretically can be reached by assembly software from this particular set of reads. The concept is based on the fact that the reference genome cannot be completely reconstructed from raw reads due to long genomic repeats and low covered regions. See Mikheenko et al., 2018 for more details. "/>
            <param name="upper_bound_min_con" argument="--upper-bound-min-con" type="integer" value="2" label="Minimal number of 'connecting reads' needed for joining upper bound contigs into a scaffold" help="This is important for a realistic estimation of genome assembly fragmentation due to long repeats. The default values is 2 for mate-pairs and 1 for long reads (PacBio or Nanopore libraries)."/>
        </section>
        <param name="contig_thresholds" argument="--contig-thresholds" type="text" value="0,1000" label="Comma-separated list of contig length thresholds (in bp)" help="Used in # contigs ≥ x and total length (≥ x) metrics"/>
        <param name="strict_NA" argument="--strict-NA" type="boolean" truevalue="--strict-NA" falsevalue="" checked="false" label="Break contigs at every misassembly event (including local ones) to compute NAx and NGAx statistics?" help="By default, QUAST breaks contigs only at extensive misassemblies (not local ones)."/>
        <param name="extensive_mis_size" argument="--extensive-mis-size" type="integer" value="1000" min="85" label="Lower threshold for the relocation size (gap or overlap size between left and right flanking sequence)" help="Shorter relocations are considered as local misassemblies. It does not affect other types of extensive misassemblies (inversions and translocations). The default value is 1000 bp. Note that the threshold should be greater than maximum indel length which is 85 bp."/>
        <param name="scaffold_gap_max_size" argument="--scaffold-gap-max-size" type="integer" value="1000" label="Max allowed scaffold gap length difference for detecting corresponding type of misassemblies" help="Longer inconsistencies are considered as relocations and thus, counted as extensive misassemblies. The default value is 10000 bp. Note that the threshold make sense only if it is greater than extensive misassembly size"/>
        <param name="unaligned_part_size" argument="--unaligned-part-size" type="integer" value="500" label="Lower threshold for detecting partially unaligned contigs" help=""/>
        <param name="skip_unaligned_mis_contigs" argument="--skip-unaligned-mis-contigs" type="boolean" truevalue="" falsevalue="--skip-unaligned-mis-contigs" checked="true" label="Distinguish contigs with more than 50% unaligned bases as a separate group of contigs?" help="By default, QUAST breaks contigs only at extensive misassemblies (not local ones)."/>
    </inputs>
    <outputs>
        <data name="quast_tabular" format="tabular" label="${tool.name} on ${on_string}: tabular report" from_work_dir="outputdir/report.tsv"/>
        <data name="report_html" format="html" label="${tool.name} on ${on_string}:  HTML report" from_work_dir="outputdir/report.html"/>
        <data name="report_pdf" format="pdf" label="${tool.name} on ${on_string}:  PDF report" from_work_dir="outputdir/report.pdf"/>
        <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outputdir/quast.log"/>
        <data name="mis_ass" format="tabular" label="${tool.name} on ${on_string}: Misassemblies" from_work_dir="outputdir/contigs_reports/misassemblies_report.txt">
            <filter>(assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true') or (assembly['type'] == 'metagenome' and assembly['ref']['origin'] != 'none')</filter>
        </data>
        <data name="unalign" format="tabular" label="${tool.name} on ${on_string}: Unaligned contigs" from_work_dir="outputdir/contigs_reports/unaligned_report.tsv">
            <filter>(assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true') or (assembly['type'] == 'metagenome' and assembly['ref']['origin'] != 'none')</filter>
        </data>
        <data name="kmers" format="tabular" label="${tool.name} on ${on_string}: K-mer-based metrics" from_work_dir="outputdir/k_mer_stats/kmers_report.txt">
            <filter>k_mer['k_mer_stats'] != ''</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <!-- Test with reference and genes annotations -->
            <conditional name="in">
                <param name="custom" value="true"/>
                <repeat name="inputs">
                    <param name="input" value="contigs1.fna"/>
                    <param name="labels" value="contig1"/>
                </repeat>
                <repeat name="inputs">
                    <param name="input" value="contigs2.fna"/>
                    <param name="labels" value=""/>
                </repeat>
            </conditional>
            <conditional name="assembly">
                <param name="type" value="genome"/>
                <conditional name="ref">
                    <param name="use_ref" value="true"/>
                    <param name="r" value="reference.fna"/>
                    <param name="features" value="genes.gff"/>
                </conditional>
                <param name="orga_type" value=""/>
            </conditional>
            <param name="min_contig" value="500"/>
            <param name="split_scaffolds" value="false"/>
            <param name="large" value="true"/>
            <conditional name="k_mer">
                <param name="k_mer_stats" value="--k-mer-stats"/>
                <param name="k_mer_size" value="101" />
            </conditional>
            <param name="circos" value="true"/>
            <section name="genes">
                <conditional name="gene_finding">
                    <param name="tool" value="--gene_finding"/>
                    <param name="gene_thresholds" value="0,300,1500,3000"/>
                </conditional>
                <param name="rna_finding" value="true"/>
                <param name="conserved_genes_finding" value="true"/>
            </section>
            <section name="al">
                <param name="use_all_alignments" value="true"/>
                <param name="min_alignment" value="65"/>
                <param name="min_identity" value="95.0"/>
                <param name="ambiguity_usage" value="one"/>
                <param name="ambiguity_score" value="0.99"/>
                <param name="fragmented" value="true"/>
                <param name="fragmented_max_indent" value="50"/>
                <param name="upper_bound_assembly" value="true"/>
                <param name="upper_bound_min_con" value="2"/>
            </section>
            <param name="contig_thresholds" value="0,1000"/>
            <param name="strict_NA" value="true"/>
            <param name="extensive_mis_size" value="1000"/>
            <param name="scaffold_gap_max_size" value="1000"/>
            <param name="unaligned_part_size" value="500"/>
            <param name="skip_unaligned_mis_contigs" value="true"/>
            <output name="quast_tabular" file="test1.tabular" lines_diff="2"/>
            <output name="mis_ass" file="test1_mis_ass.tabular"/>
            <output name="unalign" file="test1_unalign.tabular"/>
            <output name="kmers" file="test1_kmers.tabular"/>
        </test>
        <test>
            <!-- Test without reference -->
            <conditional name="in">
                <param name="custom" value="false"/>
                <param name="inputs" value="contigs1.fna,contigs2.fna"/>
            </conditional>
            <conditional name="assembly">
                <param name="type" value="genome"/>
                <conditional name="ref">
                    <param name="use_ref" value="false"/>
                </conditional>
                <param name="orga_type" value="--eukaryote"/>
            </conditional>
            <param name="min_contig" value="500"/>
            <param name="split_scaffolds" value="false"/>
            <param name="large" value="false"/>
            <conditional name="k_mer">
                <param name="k_mer_stats" value=""/>
            </conditional>
            <param name="circos" value="false"/>
            <section name="genes">
                <conditional name="gene_finding">
                    <param name="tool" value="none"/>
                </conditional>
                <param name="rna_finding" value="false"/>
                <param name="conserved_genes_finding" value="false"/>
            </section>
            <section name="al">
                <param name="use_all_alignments" value="false"/>
                <param name="min_alignment" value="65"/>
                <param name="min_identity" value="95.0"/>
                <param name="ambiguity_usage" value="one"/>
                <param name="ambiguity_score" value="0.99"/>
                <param name="fragmented" value="false"/>
                <param name="fragmented_max_indent" value="50"/>
                <param name="upper_bound_assembly" value="false"/>
                <param name="upper_bound_min_con" value="2"/>
            </section>
            <param name="contig_thresholds" value="0,1000, 500"/>
            <param name="strict_NA" value="false"/>
            <param name="extensive_mis_size" value="1000"/>
            <param name="scaffold_gap_max_size" value="1000"/>
            <param name="unaligned_part_size" value="500"/>
            <param name="skip_unaligned_mis_contigs" value="-"/>
            <output name="quast_tabular" file="test2.tabular"/>
            <output name="report_html" file="test2_report.html" compare="sim_size"/>
            <output name="report_pdf" file="test2_report.pdf" compare="sim_size"/>
        </test>
        <test>
            <!-- Test with metagenomics -->
            <conditional name="in">
                <param name="custom" value="false"/>
                <param name="inputs" value="contigs3.fasta"/>
            </conditional>
            <conditional name="assembly">
                <param name="type" value="metagenome"/>
                <conditional name="ref">
                    <param name="origin" value="none"/>
                </conditional>
            </conditional>
            <param name="min_contig" value="500"/>
            <param name="split_scaffolds" value="false"/>
            <param name="large" value="false"/>
            <conditional name="k_mer">
                <param name="k_mer_stats" value=""/>
            </conditional>
            <param name="circos" value="false"/>
            <section name="genes">
                <conditional name="gene_finding">
                    <param name="tool" value="--mgm"/>
                </conditional>
                <param name="rna_finding" value="false"/>
                <param name="conserved_genes_finding" value="false"/>
            </section>
            <section name="al">
                <param name="use_all_alignments" value="false"/>
                <param name="min_alignment" value="65"/>
                <param name="min_identity" value="95.0"/>
                <param name="ambiguity_usage" value="one"/>
                <param name="ambiguity_score" value="0.99"/>
                <param name="fragmented" value="false"/>
                <param name="fragmented_max_indent" value="50"/>
                <param name="upper_bound_assembly" value="false"/>
                <param name="upper_bound_min_con" value="2"/>
            </section>
            <param name="contig_thresholds" value="0,1000, 500"/>
            <param name="strict_NA" value="false"/>
            <param name="extensive_mis_size" value="1000"/>
            <param name="scaffold_gap_max_size" value="1000"/>
            <param name="unaligned_part_size" value="500"/>
            <param name="skip_unaligned_mis_contigs" value="-"/>
            <output name="quast_tabular" file="test3.tabular"/>
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

QUAST = QUality ASsessment Tool. The tool evaluates genome assemblies by computing various metrics.

If you have one or multiple genome assemblies, you can assess their quality with Quast. It works with or without reference genome. If you are new to Quast, start by reading its `manual page <http://quast.bioinf.spbau.ru/manual.html>`_.

**Using Quast without reference**

Without reference Quast can calculate a number of assembly related-metrics but cannot provide any information about potential misassemblies, inversions, translocations, etc. Suppose you have three assemblies produced by Unicycler corresponding to three different antibiotic treatments *car*, *pit*, and *cef* (these stand for carbenicillin, piperacillin, and cefsulodin, respectively). Evaluating them without reference will produce the following Quast outputs:

 * Quast report in HTML format
 * `Contig viewer <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_ (an HTML file)
 * `Quast report <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_ in Tab-delimited format
 * Quast log (a file technical information about Quast tool execution)

The **tab delimited Quast report** will contain the following information::

 Assembly                  pit_fna cef_fna car_fna
 # contigs (>= 0 bp)           100      91      94
 # contigs (>= 1000 bp)         62      58      61
 Total length (>= 0 bp)    6480635 6481216 6480271
 Total length (>= 1000 bp) 6466917 6468946 6467103
 # contigs                      71      66      70
 Largest contig             848753  848766  662053
 Total length              6473173 6474698 6473810
 GC (%)                      66.33   66.33   66.33
 N50                        270269  289027  254671
 N75                        136321  136321  146521
 L50                             7       7       8
 L75                            15      15      16
 # N's per 100 kbp            0.00    0.00    0.00

where values are defined as specified in `Quast manual <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_

**Quast report in HTML format** contains graphs in addition to the above metrics, while **Contig viewer** draws contigs ordered from longest to shortest. This ordering is suitable for comparing only largest contigs or number of contigs longer than a specific threshold. The viewer shows N50 and N75 with color and textual indication. If the reference genome is available or at least approximate genome length is known (see `--est-ref-size`), NG50 and NG75 are also shown. You can also tone down contigs shorter than a specified threshold using Icarus control panel:

.. image:: $PATH_TO_IMAGES/contig_view_noR.png
   :width: 558
   :height: 412

Also see `Plot description <http://quast.bioinf.spbau.ru/manual.html#sec3.2>`_ section of the manual.

**Using Quast with reference**

Car, pit, and cef are in fact assemblies of *Pseudomonas aeruginosa* UCBPP-PA14, so we can use its genome as a reference (by supplying a Fasta file containing *P. aeruginosa* pa14 genome to **Reference genome** input box). The following outputs will be produced (note the alignment viewer):

 * Quast report in HTML format
 * `Contig viewer <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_ (an HTML file)
 * `Alignment viewer <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_ (an HTML file)
 * `Quast report <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_ in Tab-delimited format
 * Summary of `misassemblies <http://quast.bioinf.spbau.ru/manual.html#sec3.1.2>`_
 * Summary of `unaligned contigs <http://quast.bioinf.spbau.ru/manual.html#sec3.1.3>`_
 * Quast log (a file technical information about Quast tool execution)

With the reference Quast produces a much more comprehensive set of results::

 Assembly                  pit_fna cef_fna car_fna
 # contigs (>= 0 bp)           100      91      94
 # contigs (>= 1000 bp)         62      58      61
 Total length (>= 0 bp)    6480635 6481216 6480271
 Total length (>= 1000 bp) 6466917 6468946 6467103
 # contigs                      71      66      70
 Largest contig             848753  848766  662053
 Total length              6473173 6474698 6473810
 Reference length          6537648 6537648 6537648
 GC (%)                      66.33   66.33   66.33
 Reference GC (%)            66.29   66.29   66.29
 N50                        270269  289027  254671
 NG50                       270269  289027  254671
 N75                        136321  136321  146521
 NG75                       136321  136321  136321
 L50                             7       7       8
 LG50                            7       7       8
 L75                            15      15      16
 LG75                           15      15      17
 # misassemblies                 0       0       0
 # misassembled contigs          0       0       0
 Misassembled contigs length     0       0       0
 # local misassemblies           1       1       2
 # unaligned mis. contigs        0       0       0
 # unaligned contigs         0 + 0   0 + 0   0 + 0
                              part    part    part
 Unaligned length                0       0       0
 Genome fraction (%)        99.015  99.038  99.025
 Duplication ratio           1.000   1.000   1.000
 # N's per 100 kbp            0.00    0.00    0.00
 # mismatches per 100 kbp     3.82    3.63    3.49
 # indels per 100 kbp         1.19    1.13    1.13
 Largest alignment          848753  848766  662053
 Total aligned length      6473163 6474660 6473792
 NA50                       270269  289027  254671
 NGA50                      270269  289027  254671
 NA75                       136321  136321  146521
 NGA75                      136321  136321  136321
 LA50                            7       7       8
 LGA50                           7       7       8
 LA75                           15      15      16
 LGA75                          15      15      17

where, again, values are defined as specified in `Quast manual <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_. You can see that this report includes a variety of data that can only be computer against a reference assembly.

 Using reference also produces an **Alignment viewer**:

.. image:: $PATH_TO_IMAGES/Align_view.png
   :width: 515
   :height: 395

Alignment viewer highlights regions of interest as, in this case, missassemblies that can potentially point to genome rearrangements (see more `here <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_).

    ]]>
    </help>

    <citations>
        <citation type="bibtex">@ARTICLE{a1,
            author = {Alexey Gurevich, Vladislav Saveliev, Nikolay Vyahhi, Glenn Tesler},
            title = {QUAST: quality assessment tool for genomce assemblies, Bioinformatics (2013) 29 (8): 1072-1075}
        }</citation>
        <citation type="bibtex">@misc{quast41,
            title = {{Quast} v4.1},
            howpublished = {http://bioinf.spbau.ru/quast},
            note = {Released May 2016}}
        }</citation>
    </citations>
</tool>
author	iuc
date	Wed, 24 Jul 2019 03:29:15 -0400
parents	f30d03867854
children	ebb0dcdb621a