<tool id="snpfreqplot" name="Variant Frequency Plot" version="@VERSION@+galaxy@GALAXY_VERSION@" profile="20.09"
      license="GPL-3.0-or-later" >
    <description>Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data</description>
        <token name="@VERSION@">1.0</token>
        <token name="@GALAXY_VERSION@">3</token>
        <requirement type="package" version="4.0">r-base</requirement>
        <requirement type="package" version="1.0.12">r-pheatmap</requirement>
        <requirement type="package" version="1.3.0">r-tidyverse</requirement>
        <requirement type="package" version="1.36.0">bioconductor-variantannotation</requirement>
    <command detect_errors="exit_code"><![CDATA[
#set $outfile = "tmp_output." + str($advanced.output_type)

cat '$__tool_directory__/helperFunctions.R' > /dev/null
&& cat '$__tool_directory__/snpEffExtract.R' > /dev/null
&& cat '$__tool_directory__/heatmap_for_variants.R' > /dev/null
&& echo "output file: $outfile"
&& Rscript '$configscript'
        <configfile name="configscript"><![CDATA[
## 1. Set Sample Inputs
##    ------------------
##    Create a dataframe of sample ids, filetypes, and filenames
##    from the input collection. At this point, the list could be
##    of mixed type (vcf and tabular), though maybe Galaxy
##    restricts that.
samples = list(ids = c(), exts= c(), files = c())
#for $i, $file in enumerate($sinputs):
samples\$ids = c(samples\$ids, '${file.element_identifier}')
samples\$exts = c(samples\$exts, '${file.extension}')
samples\$files = c(samples\$files, '${file}')
#end for
samples = data.frame(samples, stringsAsFactors=F)

## 2. Input Conversion (external script)
##    ----------------------------------
##    We source the input conversion script *after* the samples
##    have been populated, so that it performs an inplace replacement
##    of the vcf inputs with their converted tabular counterparts.
##    All samples are all tabular after this point

## 3. Galaxy Params
##    --------------
##    Set the general script parameters from the UI
variant_frequency <- as.numeric( '$varfreq' )
brewer_color_gene_annotation <- as.character( '$advanced.color' )

#if str($ == "TRUE":
pheat_clustering <- TRUE
pheat_clustering_method <- as.character( '$clustering.method' )
pheat_number_of_clusters <- as.integer( '$clustering.nclust' )
pheat_clustering <- FALSE
pheat_clustering_method <- "ward.D2"
pheat_number_of_clusters <- 5
#end if

ratio = as.numeric('$advanced.ratio')
out_ext = '$advanced.output_type'
out_file = paste0("tmp_output.", out_ext)

## 4. Generate Heatmap (external script)
##    ----------------------------------

        <param name="sinputs" format="tabular,vcf" type="data" multiple="true"
            collection_type="list" label="Variant lists data"
            help="Select at least two datasets (or a dataset collection) with variant lists (see the tool help below for format details). Datasets are expected to represent individual samples and dataset names will be used as sample identifiers." />
        <param name="varfreq" type="float" min="0" max="1" value="0.1"
            label="Variant Frequency Threshold"
            help="Only plot variants with an intrasample frequency above this threshold in at least one sample." />
        <section name="advanced" title="Image Properties" expanded="true">
            <param name="output_type" type="select" label="Plot output format" >
                <option value="pdf" selected="true" >PDF</option>
                <option value="png" >PNG</option>
                <option value="svg">SVG</option>
                <option value="tiff" >TIFF</option>
                <option value="bmp" >BMP</option>
                <option value="jpeg" >JPEG</option>
            <param name="ratio" label="Cell Ratio" type="float"
                min="0.05" value="0.67" max="20"
                help="Width:Height ratio of individual heatmap cells" />
            <param name="color" type="select" label="Color palette used for the gene annotations" >
                <option value="Set1" />
                <option value="Set2" />
                <option value="Set3" selected="true" />
                <option value="Pastel2" />
                <option value="Pastel1" />
                <option value="Paired" />
                <option value="Dark2" />
                <option value="Accent" />
                <option value="YlOrRd" />
                <option value="YlOrBr" />
                <option value="YlGnBu" />
                <option value="YlGn" />
                <option value="Reds" />
                <option value="RdPu" />
                <option value="Purples" />
                <option value="PuRd" />
                <option value="PuBuGn" />
                <option value="PuBu" />
                <option value="OrRd" />
                <option value="Oranges" />
                <option value="Greys" />
                <option value="Greens" />
                <option value="GnBu" />
                <option value="BuPu" />
                <option value="BuGn" />
                <option value="Blues" />
                <option value="Spectral" />
                <option value="RdYlGn" />
                <option value="RdYlBu" />
                <option value="RdGy" />
                <option value="RdBu" />
                <option value="PuOr" />
                <option value="PRGn" />
                <option value="PiYG" />
                <option value="BrBG" />
        <conditional name="clustering">
            <param name="do" type="select" label="Perform Clustering?" >
                <option value="TRUE">Yes</option>
                <option value="FALSE" selected="true">No</option>
            <when value="TRUE" >
                <param name="nclust" type="integer"
                    min="1" value="1" label="Number of clusters" />
                <param name="method" type="select" label="Clustering method" >
                    <option value="ward.D" />
                    <option value="ward.D2" selected="true" />
                    <option value="single" />
                    <option value="complete" />
                    <option value="average" >average (UPGMA)</option>
                    <option value="mcquitty" >mcquitty (WPGMA)</option>
                    <option value="median" >median (WPGMC)</option>
                    <option value="centroid" >centroid (UPGMC)</option>
            <when value="FALSE" />
        <data name="outfile" format="pdf" from_work_dir="tmp_output.*"
            label="Variant-Frequency Plot on ${on_string}: ${advanced.output_type}">
                <when input="advanced.output_type" value="svg" format="svg" />
                <when input="advanced.output_type" value="png" format="png" />
                <when input="advanced.output_type" value="tiff" format="tiff" />
                <when input="advanced.output_type" value="bmp" format="bmp" />
                <when input="advanced.output_type" value="jpeg" format="jpg" />
        <test expect_num_outputs="1">
            <!-- PDF, tabular inputs -->
            <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular,input443.tabular,input444.tabular" />
            <output name="outfile" ftype="pdf" value="heatmap.default.pdf" compare="sim_size" delta="250" />
        <test expect_num_outputs="1">
            <!-- PDF, tabular inputs, short color palette -->
            <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular,input443.tabular,input444.tabular" />
            <section name="advanced" >
                <param name="color" value="Set2" />
            <output name="outfile" ftype="pdf" value="heatmap.default.pdf" compare="sim_size" delta="250" />
        <test expect_num_outputs="1">
            <!-- PNG, multiple inputs, non-numeric IDS -->
            <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input443.tabular,input444.tabular" />
            <param name="varfreq" value="0.5" />
            <section name="advanced" >
                <param name="color" value="Spectral" />
                <param name="output_type" value="png" />
            <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="100000" />
        <test expect_num_outputs="1">
            <!-- SVG, clustering defaults -->
            <param name="sinputs" ftype="tabular" value="input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular" />
            <conditional name="clustering">
                <param name="do" value="TRUE" />
            <section name="advanced" >
                <param name="color" value="Greys" />
                <param name="ratio" value="0.8" />
                <param name="output_type" value="svg" />
            <output name="outfile" ftype="svg">
                    <has_text text="viewBox=&quot;0 0 1156 380&quot;" />
        <test expect_num_outputs="1">
            <!-- JPEG, clustering extras, mixed alphanumeric labels -->
            <param name="sinputs" ftype="tabular" value="input436.tabular,input443.tabular,input438.tabular,input444.tabular" />
            <conditional name="clustering">
                <param name="do" value="TRUE" />
                <param name="nclust" value="2" />
                <param name="method" value="centroid" />
            <section name="advanced" >
                <param name="color" value="Purples" />
                <param name="ratio" value="1.2" />
                <param name="output_type" value="jpeg" />
            <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="130000" />
        <test expect_num_outputs="1">
            <!-- PDF, vcf test -->
            <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf" />
            <section name="advanced" >
                <param name="color" value="PuBuGn" />
                <param name="output_type" value="pdf" />
            <output name="outfile" ftype="pdf" value="heatmap.from_vcf.pdf" compare="sim_size" delta="250" />
        <test expect_num_outputs="1">
            <!-- SVG, problematic vcf test -->
            <param name="sinputs" ftype="vcf" value="1084592.vcf,1085080.vcf,1085445.vcf,1085841.vcf,1085990.vcf" />
            <section name="advanced" >
                <param name="output_type" value="svg" />
            <output name="outfile" ftype="svg">
                    <has_text text="viewBox=&quot;0 0 754 293&quot;" />
        <test expect_num_outputs="1">
            <!-- SVG, Vcf test with problematic splice+syn at snpeff789.vcf for threshold = 0.0222
            and an "empty" file (no variants) and a file with just one variant among its inputs -->
            <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf,no_variants.vcf,single_variant.vcf" />
            <param name="varfreq" value="0.0222" />
            <section name="advanced" >
                <param name="output_type" value="svg" />
            <output name="outfile" ftype="svg">
                    <has_text text="viewBox=&quot;0 0 1962 546&quot;" />
**What it does**

This tool generates multi-sample variant-frequency plots from SnpEff-annotated
viral variant lists with optional hierarchical clustering of the samples.

.. class:: Warning mark

    Currently, this tool has been tested only on SARS-CoV-2 variant data.
    While the intention is to have it work for viral variant data in general,
    be prepared for unexpected behavior with other input data at the current
    development stage.


The tool expects input variant lists in one of the following two formats:

1. VCF datasets as produced by standard variant callers with

   - variant allele frequencies encoded in an ``AF`` INFO field
   - variant functional genomic effects annotated using SnpEff's EFF format (SnpEff's ANN format is not currently supported!)

2. tabular datasets with columns listing, at least, the following variant properties:

   - ``CHROM``
   - ``POS``
   - ``REF``
   - ``ALT``
   - ``AF``
   - ``EFF[*].AA``
   - ``EFF[*].GENE``
   - ``EFF[*].EFFECT``

   Such files can be produced with SnpSift Extract Fields and can be useful if
   preprocessing of the lists with standard text processing tools is required.

   .. class:: infomark

   To represent empty EFF fields in the tabular format you can choose between
   ``.`` and the empty string.


Example output:

.. image:: /static/images/example_output.png

        <citation type="bibtex">@unpublished{Fuchs2020,
            author = {Fuchs, Jonas},
            title = {},
            year = {2020},
            note = {Multi-sample annotated viral variant-frequency plots based on the R pheatmap package.},
            address = {Institute for Virology, University of Freiburg}