view freyja_demix.xml @ 2:53b22b7dff19 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/freyja commit dcb5fdc6a89563af33a1783e4f70754d6611585c
author iuc
date Thu, 10 Oct 2024 18:28:48 +0000
parents c0a0e79d7196
children
line wrap: on
line source

<tool id="freyja_demix" name="Freyja: Demix" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
      profile="@PROFILE@">
    <description>
        lineage abundances
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements">
        <requirement type="package" version="4.7">sed</requirement>
    </expand>
    <expand macro="version"/>
    <command detect_errors="exit_code"><![CDATA[
@RUN_FREYJA_UPDATE_COMMAND@
#if str($sample_name.source) == 'auto':
    #set $sn = $variants_in.element_identifier
#else:
    #set $sn = str($sample_name.name)
#end if
#set $in_file = $sn.replace(' ', '_') + '.' + $variants_in.ext
ln -s '$variants_in' $in_file &&
freyja demix
    '$in_file'
    '$depth_file'
    #if $eps
        --eps $eps
    #end if
    #if $meta
        --meta '$meta'
    #end if
    $confirmedonly
    $wgisaid
    @CUSTOM_BARCODES_COMMAND@
    --covcut $depth_cutoff
    --output abundances_raw.tsv &&
sed 's/$in_file/$sn/' abundances_raw.tsv > abundances.tsv
    ]]></command>
    <inputs>
        <param name="variants_in" type="data" format="tabular"
               label="Dataset with input variant calls"
               help="This can be a VCF dataset, or the tabular calls output of freayja call or ivar variants."/>
        <conditional name="sample_name">
            <param name="source" type="select" label="Set sample name"
                   help="Select autodetect to have the dataset or collection element name used as the sample name, or, for a single input dataset, provide an explicit sample name.">
                <option value="auto">Autodetect sample name</option>
                <option value="manual">Specify sample name explicitly</option>
            </param>
            <when value="auto"/>
            <when value="manual">
                <param name="name" type="text" label="Name of the sample"/>
            </when>
        </conditional>
        <expand macro="demixing_common_options"/>
        <param name="depth_cutoff" argument="--covcut" type="integer" min="0" value="10"
               multiple="true"
               label="Depth cutoff for coverage estimate"
               help="In the result file the coverage value will provide the 10x coverage estimate (percent of sites with 10 or greater reads- 10 is the default but can be modfied in this field. "/>
    </inputs>
    <outputs>
        <data name="abundances" format="tabular"
              label="${tool.name} on ${on_string}: Lineages abundances summary"
              from_work_dir="abundances.tsv"/>
    </outputs>
    <tests>
        <!-- Test 01: manual name sample -->
        <test expect_num_outputs="1">
            <param name="variants_in" value="variants.tsv"/>
            <param name="depth_file" value="depths.tsv"/>
            <conditional name="usher_update_option">
                <param name="choice" value="repo"/>
            </conditional>
            <conditional name="sample_name">
                <param name="source" value="manual"/>
                <param name="name" value="samplename"/>
            </conditional>
            <output name="abundances">
                <assert_contents>
                    <has_n_columns n="2" />
                    <has_text text="AY.48"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test 02: autoname sample -->
        <test expect_num_outputs="1">
            <param name="variants_in" value="variants.tsv"/>
            <param name="depth_file" value="depths.tsv"/>
            <conditional name="usher_update_option">
                <param name="choice" value="repo"/>
            </conditional>
            <conditional name="sample_name">
                <param name="source" value="auto"/>
            </conditional>
            <output name="abundances">
                <assert_contents>
                    <has_n_columns n="2" />
                    <has_text text="AY.48"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

**Freyja demix** estimates lineage abundances in a potentially multi-lineage input sample.

Inputs
======

The tool requires as input a dataset with called variants and a dataset with genome-wide sequencing depth information.
Both types of data can be produced with *Freyja call*, but the tool accepts variant calls also in VCF format.

Note
----

For single samples it is recommended to select "Specify sample name explicitly"
under "Set sample name".

To use this tool on multiple samples in parallel, please provide two
collections in the same sample sort order - one with the variant calls, the
other one with the sequencing depths - and select "Autodetect sample name",
which will use collection element identifiers as the names of the samples.
This will produce a new collection of demixing reports that can be passed to
*Freyja: Aggregate and visualize* with sample names preserved.

Selection of multiple regular called variants and depth datasets is discouraged
since proper dataset pairing cannot be guaranteed!

Outputs
=======

The tool produces tabular output that includes the lineages detected in the sample, their corresponding abundances, and a lineage summary by constellation.

Example output:

========== ===================================================
           filename
summarized [('Delta', 0.65), ('Other', 0.25), ('Alpha', 0.1')] 
lineages   ['B.1.617.2' 'B.1.2' 'AY.6' 'Q.3']
abundances "[0.5 0.25 0.15 0.1]"
resid      3.14159
coverage   95.8
========== ===================================================


Where *summarized* denotes a sum of all lineage abundances in a particular WHO designation (i.e. B.1.617.2 and AY.6 abundances are summed in the above example), otherwise they are grouped into "Other". The *lineage* array lists the identified lineages in descending order, and *abundances* contains the corresponding abundances estimates. The value of *resid* corresponds to the residual of the weighted least absolute devation problem used to estimate lineage abundances. The *coverage* value provides the 10x coverage estimate (percent of sites with 10 or greater reads- 10 is the default but can be modfied using the *--covcut* option).
    ]]></help>
    <expand macro="citations"/>
</tool>