diff snpfreqplot.xml @ 0:1062d6ad6503 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
author iuc
date Wed, 02 Dec 2020 21:23:06 +0000
parents
children e362b3143cde
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpfreqplot.xml	Wed Dec 02 21:23:06 2020 +0000
@@ -0,0 +1,311 @@
+<tool id="snpfreqplot" name="Variant Frequency Plot" version="@VERSION@+galaxy@GALAXY_VERSION@" profile="20.09"
+      license="GPL-3.0-or-later" >
+    <description>Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data</description>
+    <macros>
+        <token name="@VERSION@">1.0</token>
+        <token name="@GALAXY_VERSION@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="4.0">r-base</requirement>
+        <requirement type="package" version="1.0.12">r-pheatmap</requirement>
+        <requirement type="package" version="1.3.0">r-tidyverse</requirement>
+        <requirement type="package" version="1.36.0">bioconductor-variantannotation</requirement>
+        <requirement type="package" version="">xorg-libxt</requirement>
+    </requirements>
+    <edam_topics>
+        <edam_topic>topic_0797</edam_topic>
+        <edam_topic>topic_0092</edam_topic>
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_3436</edam_operation>
+    </edam_operations>
+    <command detect_errors="exit_code"><![CDATA[
+#set $outfile = "tmp_output." + str($advanced.output_type)
+
+cat '$__tool_directory__/helperFunctions.R' > /dev/null
+&& cat '$__tool_directory__/snpEffExtract.R' > /dev/null
+&& cat '$__tool_directory__/heatmap_for_variants.R' > /dev/null
+&& echo "output file: $outfile"
+&& Rscript '$configscript'
+    ]]>
+   </command>
+    <configfiles>
+        <configfile name="configscript"><![CDATA[
+## 1. Set Sample Inputs
+##    ------------------
+##    Create a dataframe of sample ids, filetypes, and filenames
+##    from the input collection. At this point, the list could be
+##    of mixed type (vcf and tabular), though maybe Galaxy
+##    restricts that.
+samples = list(ids = c(), exts= c(), files = c())
+#for $i, $file in enumerate($sinputs):
+samples\$ids = c(samples\$ids, '${file.element_identifier}')
+samples\$exts = c(samples\$exts, '${file.extension}')
+samples\$files = c(samples\$files, '${file}')
+#end for
+samples = data.frame(samples, stringsAsFactors=F)
+
+## 2. Input Conversion (external script)
+##    ----------------------------------
+##    We source the input conversion script *after* the samples
+##    have been populated, so that it performs an inplace replacement
+##    of the vcf inputs with their converted tabular counterparts.
+##
+##    All samples are all tabular after this point
+source('$__tool_directory__/helperFunctions.R')
+source('$__tool_directory__/snpEffExtract.R')
+
+## 3. Galaxy Params
+##    --------------
+##    Set the general script parameters from the UI
+variant_frequency <- as.numeric( '$varfreq' )
+brewer_color_gene_annotation <- as.character( '$advanced.color' )
+
+#if str($clustering.do) == "TRUE":
+pheat_clustering <- TRUE
+pheat_clustering_method <- as.character( '$clustering.method' )
+pheat_number_of_clusters <- as.integer( '$clustering.nclust' )
+#else
+pheat_clustering <- FALSE
+pheat_clustering_method <- "ward.D2"
+pheat_number_of_clusters <- 5
+#end if
+
+ratio = as.numeric('$advanced.ratio')
+out_ext = '$advanced.output_type'
+out_file = paste0("tmp_output.", out_ext)
+
+## 4. Generate Heatmap (external script)
+##    ----------------------------------
+source('$__tool_directory__/heatmap_for_variants.R')
+
+]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="sinputs" format="tabular,vcf" type="data" multiple="true"
+            collection_type="list" label="Variant lists data"
+            help="Select at least two datasets (or a dataset collection) with variant lists (see the tool help below for format details). Datasets are expected to represent individual samples and dataset names will be used as sample identifiers." />
+        <param name="varfreq" type="float" min="0" max="1" value="0.1"
+            label="Variant Frequency Threshold"
+            help="Only plot variants with an intrasample frequency above this threshold in at least one sample." />
+        <section name="advanced" title="Image Properties" expanded="true">
+            <param name="output_type" type="select" label="Plot output format" >
+                <option value="pdf" selected="true" >PDF</option>
+                <option value="png" >PNG</option>
+                <option value="svg">SVG</option>
+                <option value="tiff" >TIFF</option>
+                <option value="bmp" >BMP</option>
+                <option value="jpeg" >JPEG</option>
+            </param>
+            <param name="ratio" label="Cell Ratio" type="float"
+                min="0.05" value="0.67" max="20"
+                help="Width:Height ratio of individual heatmap cells" />
+            <param name="color" type="select" label="Color palette used for the gene annotations" >
+                <option value="Set1" />
+                <option value="Set2" />
+                <option value="Set3" selected="true" />
+                <option value="Pastel2" />
+                <option value="Pastel1" />
+                <option value="Paired" />
+                <option value="Dark2" />
+                <option value="Accent" />
+                <option value="YlOrRd" />
+                <option value="YlOrBr" />
+                <option value="YlGnBu" />
+                <option value="YlGn" />
+                <option value="Reds" />
+                <option value="RdPu" />
+                <option value="Purples" />
+                <option value="PuRd" />
+                <option value="PuBuGn" />
+                <option value="PuBu" />
+                <option value="OrRd" />
+                <option value="Oranges" />
+                <option value="Greys" />
+                <option value="Greens" />
+                <option value="GnBu" />
+                <option value="BuPu" />
+                <option value="BuGn" />
+                <option value="Blues" />
+                <option value="Spectral" />
+                <option value="RdYlGn" />
+                <option value="RdYlBu" />
+                <option value="RdGy" />
+                <option value="RdBu" />
+                <option value="PuOr" />
+                <option value="PRGn" />
+                <option value="PiYG" />
+                <option value="BrBG" />
+            </param>
+        </section>
+        <conditional name="clustering">
+            <param name="do" type="select" label="Perform Clustering?" >
+                <option value="TRUE">Yes</option>
+                <option value="FALSE" selected="true">No</option>
+            </param>
+            <when value="TRUE" >
+                <param name="nclust" type="integer"
+                    min="1" value="1" label="Number of clusters" />
+                <param name="method" type="select" label="Clustering method" >
+                    <option value="ward.D" />
+                    <option value="ward.D2" selected="true" />
+                    <option value="single" />
+                    <option value="complete" />
+                    <option value="average" >average (UPGMA)</option>
+                    <option value="mcquitty" >mcquitty (WPGMA)</option>
+                    <option value="median" >median (WPGMC)</option>
+                    <option value="centroid" >centroid (UPGMC)</option>
+                </param>
+            </when>
+            <when value="FALSE" />
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="pdf" from_work_dir="tmp_output.*"
+            label="Variant-Frequency Plot on ${on_string}: ${advanced.output_type}">
+            <change_format>
+                <when input="advanced.output_type" value="svg" format="svg" />
+                <when input="advanced.output_type" value="png" format="png" />
+                <when input="advanced.output_type" value="tiff" format="tiff" />
+                <when input="advanced.output_type" value="bmp" format="bmp" />
+                <when input="advanced.output_type" value="jpeg" format="jpg" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <!-- PDF, tabular inputs -->
+            <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular,input443.tabular,input444.tabular" />
+            <output name="outfile" ftype="pdf" value="heatmap.default.pdf" compare="sim_size" delta="250" />
+        </test>
+        <test expect_num_outputs="1">
+            <!-- PNG, multiple inputs, non-numeric IDS -->
+            <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input443.tabular,input444.tabular" />
+            <param name="varfreq" value="0.5" />
+            <section name="advanced" >
+                <param name="color" value="Spectral" />
+                <param name="output_type" value="png" />
+            </section>
+            <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="86000" />
+        </test>
+        <test expect_num_outputs="1">
+            <!-- SVG, clustering defaults -->
+            <param name="sinputs" ftype="tabular" value="input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular" />
+            <conditional name="clustering">
+                <param name="do" value="TRUE" />
+            </conditional>
+            <section name="advanced" >
+                <param name="color" value="Greys" />
+                <param name="ratio" value="0.8" />
+                <param name="output_type" value="svg" />
+            </section>
+            <output name="outfile" ftype="svg">
+                <assert_contents>
+                    <has_text text="viewBox=&quot;0 0 1156 335&quot;" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <!-- JPEG, clustering extras, mixed alphanumeric labels -->
+            <param name="sinputs" ftype="tabular" value="input436.tabular,input443.tabular,input438.tabular,input444.tabular" />
+            <conditional name="clustering">
+                <param name="do" value="TRUE" />
+                <param name="nclust" value="2" />
+                <param name="method" value="centroid" />
+            </conditional>
+            <section name="advanced" >
+                <param name="color" value="Purples" />
+                <param name="ratio" value="1.2" />
+                <param name="output_type" value="jpeg" />
+            </section>
+            <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="121000" />
+        </test>
+        <test expect_num_outputs="1">
+            <!-- PDF, vcf test -->
+            <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf" />
+            <section name="advanced" >
+                <param name="color" value="PuBuGn" />
+                <param name="output_type" value="pdf" />
+            </section>
+            <output name="outfile" ftype="pdf" value="heatmap.from_vcf.pdf" compare="sim_size" delta="250" />
+        </test>
+        <test expect_num_outputs="1">
+            <!-- SVG, problematic vcf test -->
+            <param name="sinputs" ftype="vcf" value="1084592.vcf,1085080.vcf,1085445.vcf,1085841.vcf,1085990.vcf" />
+            <section name="advanced" >
+                <param name="output_type" value="svg" />
+            </section>
+            <output name="outfile" ftype="svg">
+                <assert_contents>
+                    <has_text text="viewBox=&quot;0 0 754 271&quot;" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <!-- SVG, Vcf test with problematic splice+syn at snpeff789.vcf for threshold = 0.0222 -->
+            <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf" />
+            <param name="varfreq" value="0.0222" />
+            <section name="advanced" >
+                <param name="output_type" value="svg" />
+            </section>
+            <output name="outfile" ftype="svg">
+                <assert_contents>
+                    <has_text text="viewBox=&quot;0 0 3101 697&quot;" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+This tool generates multi-sample variant-frequency plots from SnpEff-annotated
+viral variant lists with optional hierarchical clustering of the samples.
+
+.. class:: Warning mark
+
+    Currently, this tool has been tested only on SARS-CoV-2 variant data.
+    While the intention is to have it work for viral variant data in general,
+    be prepared for unexpected behavior with other input data at the current
+    development stage.
+
+----
+
+The tool expects input variant lists in one of the following two formats:
+
+1. VCF datasets as produced by standard variant callers with
+
+   - variant allele frequencies encoded in an ``AF`` INFO field
+   - variant functional genomic effects annotated using SnpEff's EFF format (SnpEff's ANN format is not currently supported!)
+
+2. tabular datasets with columns listing, at least, the following variant properties:
+
+   - ``CHROM``
+   - ``POS``
+   - ``REF``
+   - ``ALT``
+   - ``AF``
+   - ``EFF[*].AA``
+   - ``EFF[*].GENE``
+   - ``EFF[*].EFFECT``
+
+   Such files can be produced with SnpSift Extract Fields and can be useful if
+   preprocessing of the lists with standard text processing tools is required.
+
+----
+
+Example output:
+
+.. image:: /static/images/example_output.png
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">@unpublished{Fuchs2020,
+            author = {Fuchs, Jonas},
+            title = {},
+            year = {2020},
+            note = {Multi-sample annotated viral variant-frequency plots based on the R pheatmap package.},
+            address = {Institute for Virology, University of Freiburg}
+        }</citation>
+    </citations>
+</tool>