view lineagespot_wrapper.xml @ 1:99494998688a draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lineagespot commit 6a6a37f2574954dae65f9ec407fe38594ed37659
author iuc
date Sun, 25 Feb 2024 09:49:20 +0000
parents 6ddf5a9ce4a5
children
line wrap: on
line source

<tool id="lineagespot" name="lineagespot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">
    <description>identifies SARS-CoV-2 lineages contributing to metagenomic samples from per-sample variant files</description>
    <macros>
        <token name="@TOOL_VERSION@">1.6.0</token>
        <token name="@VERSION_SUFFIX@">0</token>
    </macros>
    <xrefs>
        <xref type="bio.tools">lineagespot</xref>
        <xref type="bioconductor">lineagespot</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="4.3.1">r-base</requirement>
        <requirement type="package" version="1.20.4">r-getopt</requirement>
        <requirement type="package" version="@TOOL_VERSION@">bioconductor-lineagespot</requirement>
        <requirement type="package" version="3.11">python</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
## Prepare lineage definitions
mkdir refs &&
#if str($voc_source.choice) == 'cached':
    python '${__tool_directory__}/convert_lineage_defs.py' -i '${voc_source.constellations.fields.path}/definitions' -o refs &&
#end if

## Symlink the VCFs of all samples into a vcfs folder
## and all custom lineage definitions into refs/
sh arrange_custom_inputs.sh &&

#if str($ann_data.choice) == 'custom':
  ## lineagespot insists on a .gff3 suffix for the annotation file
  ln -s '$ann_data.in_gff3' custom.gff3 &&
#end if

Rscript '${__tool_directory__}/lineagespot_verbose.R'
  --in_vcf vcfs/
  --in_ref refs/
  #if str($ann_data.choice) == 'standard':
    --in_gff3 '${__tool_directory__}/NC_045512.2_annot.gff3'
  #else:
    --in_gff3 custom.gff3
  #end if
  --in_threshold $in_threshold
    ]]></command>
    <configfiles>
        <configfile filename="arrange_custom_inputs.sh"><![CDATA[mkdir vcfs &&
#for $vcf in $in_vcf:
  #set $sample_name = $vcf.element_identifier.replace("'", '_').replace('/', '_')
ln -s '$vcf' 'vcfs/${sample_name}.vcf' &&
#end for
#set $num_samples = len($in_vcf)
echo "Gathered $num_samples samples for lineagespot run."
#if str($voc_source.choice) == 'custom':
  #for $lineage_def in $voc_source.collection:
    #set $lineage_name = $lineage_def.element_identifier.replace("'", '_').replace('/', '_')
ln -s '$lineage_def' 'refs/${lineage_name}.txt' &&
#set $num_lineages = len(list($voc_source.collection))
  #end for
echo "Gathered $num_lineages custom lineage definitions for the analysis."
#end if]]></configfile>
    </configfiles>
    <inputs>
        <param type="data" name="in_vcf" format="vcf" multiple="true" label="Per-sample variant calling data" />
        <conditional name="voc_source">
            <param name="choice" type="select"
                   label="Source of lineage definitions"
                   help="Lineagespot detects lineage evidence based on lineage mutation patterns defined in tool-specific definition files. The Galaxy tool also has experimental support for reading lineage definitions from cached constellations data (see tool help below).">
                <option value="custom">Collection of lineagespot-specific definitions in history</option>
                <option value="cached">Lineage definitions from cached constellations data</option>
            </param>
            <when value="custom">
                <param name="collection" type="data_collection" collection_type="list" format="tabular" label="Collection of lineage definitions" />
            </when>
            <when value="cached">
                <param name="constellations" label="Cached constellations release" type="select">
                    <options from_data_table="pangolin_constellations">
                        <column name="value" index="0" />
                        <column name="description" index="1" />
                        <column name="date" index="3" />
                        <column name="path" index="4" />
                        <filter type="sort_by" column="3" />
                        <validator type="no_options" message="No cached constellations release available" />
                    </options>
                </param>
            </when>
        </conditional>
        <conditional name="ann_data">
            <param name="choice" type="select" label="Source of genome feature annotations" help="Select built-in genome file to use built-in annotations for the SARS-CoV-2 reference sequence NC_045512.2.">
                <option value="standard">Use built-in genome file</option>
                <option value="custom">Provide custom genome file</option>
            </param>
            <when value="standard" />
            <when value="custom">
                <param name="in_gff3" type="data" format="gff3" label="Genes GFF" help="GFF3 input listing gene positions on the reference sequence" />
            </when>
        </conditional>
        <param type="float" name="in_threshold" value="0.8" label="AF threshold for identifying variants per sample" help="Variants in the input VCFs with an allele frequency less than the threshold will be ignored as noise in the lineage analysis."/>
    </inputs>
    <outputs>
        <data name="lineage_hits" format="tabular" from_work_dir="lineage_hits.txt" label="${tool.name} on ${on_string}: Lineage Hits"/>
        <data name="lineage_report" format="tabular" from_work_dir="lineage_report.txt" label="${tool.name} on ${on_string}: Lineage Report"/>
        <data name="variants_table" format="tabular" from_work_dir="variants_table.txt" label="${tool.name} on ${on_string}: Variants Table"/>
    </outputs>
    <tests>
        <!-- test with custom genome annotation and lineage definitions -->
        <test expect_num_outputs="3">
            <param name="in_vcf" value="SampleA_freebayes_ann.vcf,SampleB_freebayes_ann.vcf,SampleC_freebayes_ann.vcf"/>
            <conditional name="ann_data">
                <param name="choice" value="custom"/>
                <param name="in_gff3" value="NC_045512.2_annot.gff3"/>
            </conditional>
            <conditional name="voc_source">
                <param name="choice" value="custom"/>
                <param name="collection">
                    <collection type="list">
                        <element name="AY.1" ftype="tabular" value="AY.1.txt"/>
                        <element name="B.1.617.2" ftype="tabular" value="B.1.617.2.txt"/>
                        <element name="B.1.351" ftype="tabular" value="B.1.351.txt"/>
                        <element name="P.1" ftype="tabular" value="P.1.txt"/>
                    </collection>
                </param>
            </conditional>
            <output name="lineage_hits">
                <assert_contents>
                    <has_n_lines n="208"/>
                </assert_contents>
            </output>
            <output name="lineage_report">
                <assert_contents>
                    <has_n_lines n="13"/>
                </assert_contents>
            </output>
            <output name="variants_table">
                <assert_contents>
                    <has_n_lines n="1829"/>
                </assert_contents>
            </output>
        </test>
        <!-- test with built-in genome annotation and cached constellations -->
        <test expect_num_outputs="3">
            <param name="in_vcf" value="SampleA_freebayes_ann.vcf,SampleB_freebayes_ann.vcf,SampleC_freebayes_ann.vcf"/>
            <conditional name="ann_data">
                <param name="choice" value="standard"/>
            </conditional>
            <conditional name="voc_source">
                <param name="choice" value="cached"/>
                <param name="constellations" value="v0.1.12"/>
            </conditional>
            <output name="lineage_hits">
                <assert_contents>
                    <has_n_lines n="123"/>
                </assert_contents>
            </output>
            <output name="lineage_report">
                <assert_contents>
                    <has_n_lines n="10"/>
                </assert_contents>
            </output>
            <output name="variants_table">
                <assert_contents>
                    <has_n_lines n="1829"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**lineagespot** - Detection of SARS-CoV-2 lineages in wastewater samples using next-generation sequencing

The Galaxy tool wraps the functionality of the `lineagespot Bioconductor package <https://doi.org/doi:10.18129/B9.bioc.lineagespot>`__.

**Inputs**

*VCF datasets with per-sample variant calls*

The tool accepts any number of input datasets in VCF format, for which it will generate reports of SARS-CoV-2 lineage evidence. These datasets need to be annotated using the VCF annotation standard field 'ANN' as added, for example, by SnpEff.

*Lineage definitions*

The tool requires definitions of mutation profiles for lineages of interest (it cannot find evidence for lineages it does not know about). These can be provided as a collection of simple 2-columns tabular datasets, like this definition for lineage B.1.617.2::

    gene    amino acid
    ORF1b   P314L
    ORF1b   G662S
    ORF1b   P1000L
    S       T19R
    S       G142D
    S       E156G
    S       del157/158
    S       L452R
    S       T478K
    S       D614G
    S       P681R
    S       D950N
    ORF3a   S26L
    M       I82T
    ORF7a   V82A
    ORF7a   T120I
    ORF8    D119I
    ORF8    del120/121
    N       D63G
    N       R203M
    N       D377Y

where the gene names should match those used by the upstream tool producing the VCF ANN field.

Alternatively, lineage definitions can be extracted from `constellations <https://github.com/cov-lineages/constellations>`__ data cached on the Galaxy server.

.. class:: Warning mark

    Please note that extraction of lineage definitions from constellations data is still **experimental**.
    The conversion process may drop some lineage defining mutations and shouldn't be trusted blindly.
    For full and up to date details see the `comments in the conversion script <https://github.com/search?q=repo%3Agalaxyproject%2Ftools-iuc+path%3A%2F%5Etools%5C%2Flineagespot%5C%2F%2F+convert_lineage_defs.py&type=code>`__.

*Genome feature annotations*

To learn about the position of the genes mentioned in VCF ANN fields and the lineage definitions on the genome, the tool requires an additional genome feature annotation input in gff3 format.
You should normally just use the offered built-in genome annotation file.
A custom annotation file would only be required if you've mapped your sequencing data to a different reference genome than NC_045512.2 that isn't positionally identical to it.
    ]]></help>
    <citations>
        <citation type="doi">10.1038/s41598-022-06625-6</citation>
    </citations>
</tool>