view nextclade.xml @ 19:128ba8da994f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nextclade commit c41319b6593caa0a11835f5e8bc48b1f1e316ff8
author iuc
date Sat, 08 Oct 2022 20:03:50 +0000
parents feb40665d7cd
children 4f6349228462
line wrap: on
line source

<tool id="nextclade" name="Nextclade" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">
    <description>Viral genome clade assignment, mutation calling, and sequence quality checks</description>
    <macros>
        <import>macros.xml</import>
        <token name="@VERSION_SUFFIX@">0</token>
        <!-- sars-cov-2 and influenza databases are compatible with nextclade 1+, MPXV ones
             with nextclade 2+ but at time of writing this tool is for nextclade version 2,
             so the following regexp will select all databases known to be compatible with
             this tool and exclude any requiring a future nextclade version 3. -->
        <token name="@COMPATIBILITY_SPEC@"><![CDATA[(^1\..+$|^2\.[0-4](\..+)*$)]]></token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">nextclade</requirement>
        <requirement type="package" version="9.0">coreutils</requirement>
    </requirements>
    <version_command>nextclade --version-detailed</version_command>
    <command detect_errors="exit_code"><![CDATA[
        #if $db.source == "cached"
            ln -s '${db.release.fields.path}' db &&
        #elif $db.source == "download"
            nextclade dataset get -n '${organism}' -o db &&
        #end if 
        @QUERY_FASTA@
        nextclade run
        --input-dataset db/
        #if $adv.advanced_options == 'yes'
            #if $adv.input_qc_config
                --input-qc-config '$adv.input_qc_config'
            #end if
            #if $adv.input_root_seq
                --input-root-seq '$adv.input_root_seq'
            #end if
            #if $adv.input_tree
                --input-tree '$adv.input_tree'
            #end if
            #if $adv.input_gene_map
                --input-gene-map '$adv.input_gene_map'
            #end if 
            #if $adv.input_pcr_primers
                --input-pcr-primers '$adv.input_pcr_primers'
            #end if
            #if $adv.input_virus_properties
                --input-virus-properties '$adv.input_virus_properties'
            #end if
        #end if
        #if $outputs and "report_tsv" in $outputs
            #if $include_header
                --output-tsv '$report_tsv'
            #else
                --output-tsv report.tsv
            #end if
        #end if 
        #if $outputs and "report_json" in $outputs
            --output-json '${report_json}'
        #end if 
        #if $outputs and "output_tree" in $outputs
            --output-tree '${output_tree}'
        #end if
        #if $outputs and "output_fasta" in $outputs
            --output-fasta "${output_fasta}"
            #if str($adv.advanced_options) == "yes"
                $adv.include_reference
            #end if
        #end if
        $query
        #if $outputs and "report_tsv" in $outputs and not $include_header
            && tail -n+2 report.tsv >'$report_tsv'
        #end if 
    ]]></command>
    <inputs>
        <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="FASTA file with input sequences"/>
        <param name="organism" type="select" label="Organism">
            <option value="sars-cov-2" selected="true">SARS-CoV-2</option>
            <option value="sars-cov-2-no-recomb">SARS-CoV-2 without recombinants</option>
            <option value="sars-cov-2-21L">SARS-CoV-2 relative to BA.2</option>
            <option value="flu_h1n1pdm_ha">Influenza A H1N1pdm HA</option>
            <option value="flu_h3n2_ha">Influenza A H3N2 HA</option>
            <option value="flu_vic_ha">Influenza B Victoria HA</option>
            <option value="flu_yam_ha">Influenza B Yamagata HA</option>
            <option value="MPXV">Monkeypox (All Clades - MPXV)</option>
            <option value="hMPXV">Human Monkeypox (hMPXV)</option>
            <option value="hMPXV_B1">Human Monkeypox Clade B.1 (hMPXV_B1)</option>
        </param>
        <conditional name="db">
            <param name="source" type="select" label="Version of database to use">
                <option value="cached" selected="true">Use specific database version cached on this Galaxy server</option>
                <option value="download">Download latest available database version from web</option>
            </param>
            <when value="cached">
                <param name="release" label="Cached nextclade database release" type="select">
                    <options from_data_table="nextclade">
                        <column name="value" index="0"/>
                        <column name="description" index="2"/>
                        <column name="date" index="4"/>
                        <column name="path" index="5"/>
                        <filter type="sort_by" column="4"/>
                        <filter type="param_value" ref="organism" column="1"/>
                        <filter type="regexp" column="3" value="@COMPATIBILITY_SPEC@"/>
                    </options>
                </param>
            </when>
            <when value="download"/>
        </conditional>
        <param name="outputs" type="select" label="Output options" help="Output reports and optionally tree" multiple="true">
            <option value="report_json">JSON format report</option>
            <option value="report_tsv" selected="true">Tabular format report</option>
            <option value="output_tree">Auspice v2 tree file (JSON format)</option>
            <option value="output_fasta">Aligned sequences (FASTA format)</option>
        </param>
        <param name="include_header" type="boolean" label="Include header line in output file" truevalue="true" falsevalue="false"/>
        <conditional name="adv">
            <param name="advanced_options" type="select" label="Use advanced options">
                <option value="yes">Yes</option>
                <option value="no" selected="true">No</option>
            </param>
            <when value="yes">
                <param argument="--input-qc-config" type="data" label="Quality Control (QC) Config file" format="json" optional="true" help="QC config json file containing custom QC configuration"/>
                <param argument="--input-root-seq" type="data" label="Custom root sequence" format="txt" optional="true" help="Text file containing custom root sequence"/>
                <param argument="--input-tree" type="data" label="Custom reference tree" format="json" optional="true" help="Auspice JSON v2 file containing custom reference tree"/>
                <param argument="--input-gene-map" type="data" label="Custom gene map" format="json" optional="true" help="JSON file containing custom gene map. Gene map (sometimes also called 'gene annotations') is used to resolve aminoacid changes in genes"/>
                <param argument="--input-pcr-primers" type="data" label="Custom PCR primer sites" format="json" optional="true" help="CSV file containing a list of custom PCR primer sites. These are used to report mutations in these sites"/>
                <param argument="--input-virus-properties" type="data" label="Configuration and data specific to a pathogen" format="json" optional="true" help="For more info on the virus properties JSON file, see the Nextclade documentation."/>
                <param argument="--include-reference" truevalue="--include-reference" falsevalue="" type="boolean" label="Include reference sequence in FASTA alignment output" help="The FASTA alignment is an optional output of nextclade. Select this option to include the reference sequence in that alignment file"/>
            </when>
            <when value="no">
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="report_tsv" format="tabular" label="${tool.name} on ${on_string} (TSV report)">
            <filter>outputs and "report_tsv" in outputs</filter>
            <actions>
                <conditional name="organism">
                    <expand macro="column_metadata" dataset_name="sars-cov-2" extra_columns="Nextclade_pango,"/>
                    <expand macro="column_metadata" dataset_name="sars-cov-2-no-recomb" extra_columns="Nextclade_pango,"/>
                    <expand macro="column_metadata" dataset_name="sars-cov-2-21L" extra_columns="Nextclade_pango,partiallyAliased,immune_escape,ace2_binding,"/>
                    <expand macro="column_metadata" dataset_name="flu_h1n1pdm_ha"/>
                    <expand macro="column_metadata" dataset_name="flu_h3n2_ha"/>
                    <expand macro="column_metadata" dataset_name="flu_vic_ha"/>
                    <expand macro="column_metadata" dataset_name="flu_yam_ha"/>
                    <expand macro="column_metadata" dataset_name="MPXV" extra_columns="outbreak,lineage,"/>
                    <expand macro="column_metadata" dataset_name="hMPXV" extra_columns="outbreak,lineage,"/>
                    <expand macro="column_metadata" dataset_name="hMPXV_B1" extra_columns="outbreak,lineage,"/>
                </conditional>
            </actions>
        </data>
        <data name="report_json" format="json" label="${tool.name} on ${on_string} (JSON report)">
            <filter>outputs and "report_json" in outputs</filter>
        </data>
        <data name="output_tree" format="json" label="${tool.name} on ${on_string} (Auspice v2 tree)">
            <filter>outputs and "output_tree" in outputs</filter>
        </data>
        <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string} (FASTA alignment)">
            <filter>outputs and "output_fasta" in outputs</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta"/>
            <conditional name="db">
                <param name="source" value="download"/>
            </conditional>
            <param name="outputs" value="report_tsv"/>
            <param name="organism" value="sars-cov-2"/>
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="67"/>
                    <has_text text="20A"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta"/>
            <conditional name="db">
                <param name="source" value="download"/>
            </conditional>
            <param name="outputs" value="report_tsv,report_json,output_tree"/>
            <param name="organism" value="sars-cov-2"/>
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="67"/>
                    <has_text text="20A"/>
                </assert_contents>
                <metadata name="column_names" value="seqName,clade,Nextclade_pango,qc.overallScore,qc.overallStatus,totalSubstitutions,totalDeletions,totalInsertions,totalFrameShifts,totalAminoacidSubstitutions,totalAminoacidDeletions,totalAminoacidInsertions,totalMissing,totalNonACGTNs,totalPcrPrimerChanges,substitutions,deletions,insertions,privateNucMutations.reversionSubstitutions,privateNucMutations.labeledSubstitutions,privateNucMutations.unlabeledSubstitutions,privateNucMutations.totalReversionSubstitutions,privateNucMutations.totalLabeledSubstitutions,privateNucMutations.totalUnlabeledSubstitutions,privateNucMutations.totalPrivateSubstitutions,frameShifts,aaSubstitutions,aaDeletions,aaInsertions,missing,nonACGTNs,pcrPrimerChanges,alignmentScore,alignmentStart,alignmentEnd,coverage,qc.missingData.missingDataThreshold,qc.missingData.score,qc.missingData.status,qc.missingData.totalMissing,qc.mixedSites.mixedSitesThreshold,qc.mixedSites.score,qc.mixedSites.status,qc.mixedSites.totalMixedSites,qc.privateMutations.cutoff,qc.privateMutations.excess,qc.privateMutations.score,qc.privateMutations.status,qc.privateMutations.total,qc.snpClusters.clusteredSNPs,qc.snpClusters.score,qc.snpClusters.status,qc.snpClusters.totalSNPs,qc.frameShifts.frameShifts,qc.frameShifts.totalFrameShifts,qc.frameShifts.frameShiftsIgnored,qc.frameShifts.totalFrameShiftsIgnored,qc.frameShifts.score,qc.frameShifts.status,qc.stopCodons.stopCodons,qc.stopCodons.totalStopCodons,qc.stopCodons.score,qc.stopCodons.status,isReverseComplement,failedGenes,warnings,errors"/>
            </output>
            <output name="report_json">
                <assert_contents>
                    <has_text text="&quot;pos&quot;: 240,"/>
                </assert_contents>
            </output>
            <output name="output_tree">
                <assert_contents>
                    <has_text text="&quot;title&quot;: &quot;QC Status&quot;"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta"/>
            <conditional name="db">
                <param name="source" value="download"/>
            </conditional>
            <param name="outputs" value="report_tsv"/>
            <param name="organism" value="sars-cov-2"/>
            <conditional name="adv">
                <param name="advanced_options" value="yes"/>
                <param name="input_qc_config" value="strict_qc.json" ftype="json"/>
            </conditional>
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="67"/>
                    <has_text text="mediocre"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta"/>
            <conditional name="db">
                <param name="source" value="download"/>
            </conditional>
            <param name="outputs" value="report_tsv,output_fasta"/>
            <param name="organism" value="sars-cov-2"/>
            <conditional name="adv">
                <param name="advanced_options" value="yes"/>
                <param name="include_reference" value="true"/>
            </conditional>
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="67"/>
                    <has_text text="mediocre"/>
                </assert_contents>
            </output>
            <output name="output_fasta" value="output_alignment.fasta" ftype="fasta"/>
        </test>
        <test expect_num_outputs="1">
            <param name="input_fasta" value="mpxv.fasta.gz" ftype="fasta.gz"/>
            <conditional name="db">
                <param name="source" value="download"/>
            </conditional>
            <param name="outputs" value="report_tsv"/>
            <param name="organism" value="MPXV"/>
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="68"/>
                    <has_text text="hMPXV-1"/>
                </assert_contents>
                <metadata name="column_names" value="seqName,clade,outbreak,lineage,qc.overallScore,qc.overallStatus,totalSubstitutions,totalDeletions,totalInsertions,totalFrameShifts,totalAminoacidSubstitutions,totalAminoacidDeletions,totalAminoacidInsertions,totalMissing,totalNonACGTNs,totalPcrPrimerChanges,substitutions,deletions,insertions,privateNucMutations.reversionSubstitutions,privateNucMutations.labeledSubstitutions,privateNucMutations.unlabeledSubstitutions,privateNucMutations.totalReversionSubstitutions,privateNucMutations.totalLabeledSubstitutions,privateNucMutations.totalUnlabeledSubstitutions,privateNucMutations.totalPrivateSubstitutions,frameShifts,aaSubstitutions,aaDeletions,aaInsertions,missing,nonACGTNs,pcrPrimerChanges,alignmentScore,alignmentStart,alignmentEnd,coverage,qc.missingData.missingDataThreshold,qc.missingData.score,qc.missingData.status,qc.missingData.totalMissing,qc.mixedSites.mixedSitesThreshold,qc.mixedSites.score,qc.mixedSites.status,qc.mixedSites.totalMixedSites,qc.privateMutations.cutoff,qc.privateMutations.excess,qc.privateMutations.score,qc.privateMutations.status,qc.privateMutations.total,qc.snpClusters.clusteredSNPs,qc.snpClusters.score,qc.snpClusters.status,qc.snpClusters.totalSNPs,qc.frameShifts.frameShifts,qc.frameShifts.totalFrameShifts,qc.frameShifts.frameShiftsIgnored,qc.frameShifts.totalFrameShiftsIgnored,qc.frameShifts.score,qc.frameShifts.status,qc.stopCodons.stopCodons,qc.stopCodons.totalStopCodons,qc.stopCodons.score,qc.stopCodons.status,isReverseComplement,failedGenes,warnings,errors"/>
            </output>
        </test>
    </tests>
    <help><![CDATA[

.. class:: infomark

**What it does**

Nextclade_ assigns clades, calls mutations and performs sequence quality checks on SARS-CoV-2 genomes.

For a description of nextclade's configuration files, see the `nextclade documentation <https://docs.nextstrain.org/projects/nextclade/en/latest/index.html>`_.

**Input**

Input is a FASTA file containing one or more SARS-CoV-2 consensus genomes.

**Output**

Outputs can include:

* A tabular format file with a report, one line per input sequence

* A JSON format file with the same information as is present in the tabular report

* An Auspice v2 tree file in JSON format

.. _Nextclade: https://github.com/nextstrain/nextclade

    ]]></help>
    <expand macro="citations"/>
</tool>