view nextclade.xml @ 18:feb40665d7cd draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nextclade commit e72bef3964ce9d0c52ef2713d177684dfbee4cfb
author iuc
date Wed, 31 Aug 2022 16:40:29 +0000
parents 07ab9bd68a02
children 128ba8da994f
line wrap: on
line source

<tool id="nextclade" name="Nextclade" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">
    <description>Viral genome clade assignment, mutation calling, and sequence quality checks</description>
    <macros>
        <import>macros.xml</import>
        <token name="@VERSION_SUFFIX@">0</token>
        <!-- sars-cov-2 and influenza databases are compatible with nextclade 1+, MPXV ones
             with nextclade 2+ but at time of writing this tool is for nextclade version 2,
             so the following regexp will select all databases known to be compatible with
             this tool and exclude any requiring a future nextclade version 3. -->
        <token name="@COMPATIBILITY_SPEC@"><![CDATA[(^1\..+$|^2\.[0-4](\..+)*$)]]></token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">nextclade</requirement>
        <requirement type="package" version="9.0">coreutils</requirement>
    </requirements>
    <version_command>nextclade --version-detailed</version_command>
    <command detect_errors="exit_code"><![CDATA[
        #if $db.source == "cached"
            ln -s '${db.release.fields.path}' db &&
        #elif $db.source == "download"
            nextclade dataset get -n '${organism}' -o db &&
        #end if 
        @QUERY_FASTA@
        nextclade run
        --input-dataset db/
        #if $adv.advanced_options == 'yes'
            #if $adv.input_qc_config
                --input-qc-config '$adv.input_qc_config'
            #end if
            #if $adv.input_root_seq
                --input-root-seq '$adv.input_root_seq'
            #end if
            #if $adv.input_tree
                --input-tree '$adv.input_tree'
            #end if
            #if $adv.input_gene_map
                --input-gene-map '$adv.input_gene_map'
            #end if 
            #if $adv.input_pcr_primers
                --input-pcr-primers '$adv.input_pcr_primers'
            #end if
            #if $adv.input_virus_properties
                --input-virus-properties '$adv.input_virus_properties'
            #end if
        #end if
        #if $outputs and "report_tsv" in $outputs
            #if $include_header
                --output-tsv '$report_tsv'
            #else
                --output-tsv report.tsv
            #end if
        #end if 
        #if $outputs and "report_json" in $outputs
            --output-json '${report_json}'
        #end if 
        #if $outputs and "output_tree" in $outputs
            --output-tree '${output_tree}'
        #end if
        #if $outputs and "output_fasta" in $outputs
            --output-fasta "${output_fasta}"
            #if str($adv.advanced_options) == "yes"
                $adv.include_reference
            #end if
        #end if
        $query
        #if $outputs and "report_tsv" in $outputs and not $include_header
            && tail -n+2 report.tsv >'$report_tsv'
        #end if 
    ]]></command>
    <inputs>
        <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="FASTA file with input sequences" />
        <param name="organism" type="select" label="Organism">
            <option value="sars-cov-2" selected="true">SARS-CoV-2</option>
            <option value="sars-cov-2-no-recomb">SARS-CoV-2 without recombinants</option>
            <option value="flu_h1n1pdm_ha">Influenza A H1N1pdm HA</option>
            <option value="flu_h3n2_ha">Influenza A H3N2 HA</option>
            <option value="flu_vic_ha">Influenza B Victoria HA</option>
            <option value="flu_yam_ha">Influenza B Yamagata HA</option>
            <option value="MPXV">Monkeypox (All Clades)</option>
            <option value="hMPXV">Human Monkeypox (hMPXV)</option>
            <option value="hMPXV_B1">Human Monkeypox Clade B.1</option>
        </param>   
        <conditional name="db">
            <param name="source" type="select" label="Version of database to use">
                <option value="cached" selected="true">Use specific database version cached on this Galaxy server</option>
                <option value="download">Download latest available database version from web</option>
            </param>
            <when value="cached">
                <param name="release" label="Cached nextclade database release" type="select">
                    <options from_data_table="nextclade">
                        <column name="value" index="0" />
                        <column name="description" index="2" />
                        <column name="date" index="4" />
                        <column name="path" index="5" />
                        <filter type="sort_by" column="4" />
                        <filter type="param_value" ref="organism" column="1" />
                        <filter type="regexp" column="3" value="@COMPATIBILITY_SPEC@" />
                    </options>
                </param>
            </when>
            <when value="download"></when>
        </conditional>
        <param name="outputs" type="select" label="Output options" help="Output reports and optionally tree" multiple="true">
            <option value="report_json">JSON format report</option>
            <option value="report_tsv" selected="true">Tabular format report</option>
            <option value="output_tree">Auspice v2 tree file (JSON format)</option>
            <option value="output_fasta">Aligned sequences (FASTA format)</option>
        </param>
        <param name="include_header" type="boolean" label="Include header line in output file" 
            truevalue="true" falsevalue="false" />
        <conditional name="adv">
            <param name="advanced_options" type="select" label="Use advanced options" >
                <option value="yes">Yes</option>
                <option value="no" selected="true">No</option>
            </param>
            <when value="yes">
                <param argument="--input-qc-config" type="data" label="Quality Control (QC) Config file" format="json" optional="true" help="QC config json file containing custom QC configuration" />
                <param argument="--input-root-seq" type="data" label="Custom root sequence" format="txt" optional="true" help="Text file containing custom root sequence" />
                <param argument="--input-tree" type="data" label="Custom reference tree" format="json" optional="true" help="Auspice JSON v2 file containing custom reference tree" />
                <param argument="--input-gene-map" type="data" label="Custom gene map" format="json" optional="true" help="JSON file containing custom gene map. Gene map (sometimes also called 'gene annotations') is used to resolve aminoacid changes in genes" />
                <param argument="--input-pcr-primers" type="data" label="Custom PCR primer sites" format="json" optional="true" help="CSV file containing a list of custom PCR primer sites. These are used to report mutations in these sites" />
                <param argument="--input-virus-properties" type="data" label="Configuration and data specific to a pathogen" format="json" optional="true" help="For more info on the virus properties JSON file, see the Nextclade documentation." />
                <param argument="--include-reference" truevalue="--include-reference" falsevalue="" type="boolean" label="Include reference sequence in FASTA alignment output" help="The FASTA alignment is an optional output of nextclade. Select this option to include the reference sequence in that alignment file" />                
            </when>
            <when value="no">
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="report_tsv" format="tabular" label="${tool.name} on ${on_string} (TSV report)">            
            <filter>outputs and "report_tsv" in outputs</filter>
            <actions>
                <action name="column_names" type="metadata" default="seqName,clade,Nextclade_pango,qc.overallScore,qc.overallStatus,totalSubstitutions,totalDeletions,totalInsertions,totalFrameShifts,totalAminoacidSubstitutions,totalAminoacidDeletions,totalAminoacidInsertions,totalMissing,totalNonACGTNs,totalPcrPrimerChanges,substitutions,deletions,insertions,privateNucMutations.reversionSubstitutions,privateNucMutations.labeledSubstitutions,privateNucMutations.unlabeledSubstitutions,privateNucMutations.totalReversionSubstitutions,privateNucMutations.totalLabeledSubstitutions,privateNucMutations.totalUnlabeledSubstitutions,privateNucMutations.totalPrivateSubstitutions,frameShifts,aaSubstitutions,aaDeletions,aaInsertions,missing,nonACGTNs,pcrPrimerChanges,alignmentScore,alignmentStart,alignmentEnd,qc.missingData.missingDataThreshold,qc.missingData.score,qc.missingData.status,qc.missingData.totalMissing,qc.mixedSites.mixedSitesThreshold,qc.mixedSites.score,qc.mixedSites.status,qc.mixedSites.totalMixedSites,qc.privateMutations.cutoff,qc.privateMutations.excess,qc.privateMutations.score,qc.privateMutations.status,qc.privateMutations.total,qc.snpClusters.clusteredSNPs,qc.snpClusters.score,qc.snpClusters.status,qc.snpClusters.totalSNPs,qc.frameShifts.frameShifts,qc.frameShifts.totalFrameShifts,qc.frameShifts.frameShiftsIgnored,qc.frameShifts.totalFrameShiftsIgnored,qc.frameShifts.score,qc.frameShifts.status,qc.stopCodons.stopCodons,qc.stopCodons.totalStopCodons,qc.stopCodons.score,qc.stopCodons.status,isReverseComplement,failedGenes,warnings,errors" />
            </actions>
        </data>
        <data name="report_json" format="json" label="${tool.name} on ${on_string} (JSON report)">
            <filter>outputs and "report_json" in outputs</filter>
        </data>
        <data name="output_tree" format="json" label="${tool.name} on ${on_string} (Auspice v2 tree)">
            <filter>outputs and "output_tree" in outputs</filter>
        </data>
        <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string} (FASTA alignment)">
            <filter>outputs and "output_fasta" in outputs</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta" />
            <conditional name="db">
                <param name="source" value="download" />
            </conditional>
            <param name="outputs" value="report_tsv" />
            <param name="organism" value="sars-cov-2" />
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="66" />
                    <has_text text="20A" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta" />
            <conditional name="db">
                <param name="source" value="download" />
            </conditional>
            <param name="outputs" value="report_tsv,report_json,output_tree" />
            <param name="organism" value="sars-cov-2" />
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="66" />
                    <has_text text="20A" />
                </assert_contents>
            </output>
            <output name="report_json">
                <assert_contents>
                    <has_text text='"pos": 240,' />
                </assert_contents>
            </output>
            <output name="output_tree">
                <assert_contents>
                    <has_text text='"title": "QC Status"' />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta" />
            <conditional name="db">
                <param name="source" value="download" />
            </conditional>
            <param name="outputs" value="report_tsv" />
            <param name="organism" value="sars-cov-2" />
            <conditional name="adv">
                <param name="advanced_options" value="yes" />
                <param name="input_qc_config" value="strict_qc.json" ftype="json" />
            </conditional>
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="66" />
                    <has_text text="mediocre" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta" />
            <conditional name="db">
                <param name="source" value="download" />
            </conditional>
            <param name="outputs" value="report_tsv,output_fasta" />
            <param name="organism" value="sars-cov-2" />
            <conditional name="adv">
                <param name="advanced_options" value="yes" />
                <param name="include_reference" value="true" />
            </conditional>
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="66" />
                    <has_text text="mediocre" />
                </assert_contents>
            </output>
            <output name="output_fasta" value="output_alignment.fasta" ftype="fasta" />
        </test>
        <test expect_num_outputs="1">
            <param name="input_fasta" value="mpxv.fasta.gz" ftype="fasta.gz" />
            <conditional name="db">
                <param name="source" value="download" />
            </conditional>
            <param name="outputs" value="report_tsv" />
            <param name="organism" value="MPXV" />
            <output name="report_tsv">
                <assert_contents>
                    <has_n_columns n="67" />
                    <has_text text="hMPXV-1" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[

.. class:: infomark

**What it does**

Nextclade_ assigns clades, calls mutations and performs sequence quality checks on SARS-CoV-2 genomes.

For a description of nextclade's configuration files, see the `nextclade documentation <https://docs.nextstrain.org/projects/nextclade/en/latest/index.html>`_.

**Input**

Input is a FASTA file containing one or more SARS-CoV-2 consensus genomes.

**Output**

Outputs can include:

* A tabular format file with a report, one line per input sequence

* A JSON format file with the same information as is present in the tabular report

* An Auspice v2 tree file in JSON format

.. _Nextclade: https://github.com/nextstrain/nextclade

    ]]></help>
    <expand macro="citations" />
</tool>