Mercurial > repos > iuc > snpfreqplot
diff snpfreqplot.xml @ 0:1062d6ad6503 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
author | iuc |
---|---|
date | Wed, 02 Dec 2020 21:23:06 +0000 |
parents | |
children | e362b3143cde |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpfreqplot.xml Wed Dec 02 21:23:06 2020 +0000 @@ -0,0 +1,311 @@ +<tool id="snpfreqplot" name="Variant Frequency Plot" version="@VERSION@+galaxy@GALAXY_VERSION@" profile="20.09" + license="GPL-3.0-or-later" > + <description>Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data</description> + <macros> + <token name="@VERSION@">1.0</token> + <token name="@GALAXY_VERSION@">0</token> + </macros> + <requirements> + <requirement type="package" version="4.0">r-base</requirement> + <requirement type="package" version="1.0.12">r-pheatmap</requirement> + <requirement type="package" version="1.3.0">r-tidyverse</requirement> + <requirement type="package" version="1.36.0">bioconductor-variantannotation</requirement> + <requirement type="package" version="">xorg-libxt</requirement> + </requirements> + <edam_topics> + <edam_topic>topic_0797</edam_topic> + <edam_topic>topic_0092</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_3436</edam_operation> + </edam_operations> + <command detect_errors="exit_code"><![CDATA[ +#set $outfile = "tmp_output." + str($advanced.output_type) + +cat '$__tool_directory__/helperFunctions.R' > /dev/null +&& cat '$__tool_directory__/snpEffExtract.R' > /dev/null +&& cat '$__tool_directory__/heatmap_for_variants.R' > /dev/null +&& echo "output file: $outfile" +&& Rscript '$configscript' + ]]> + </command> + <configfiles> + <configfile name="configscript"><![CDATA[ +## 1. Set Sample Inputs +## ------------------ +## Create a dataframe of sample ids, filetypes, and filenames +## from the input collection. At this point, the list could be +## of mixed type (vcf and tabular), though maybe Galaxy +## restricts that. +samples = list(ids = c(), exts= c(), files = c()) +#for $i, $file in enumerate($sinputs): +samples\$ids = c(samples\$ids, '${file.element_identifier}') +samples\$exts = c(samples\$exts, '${file.extension}') +samples\$files = c(samples\$files, '${file}') +#end for +samples = data.frame(samples, stringsAsFactors=F) + +## 2. Input Conversion (external script) +## ---------------------------------- +## We source the input conversion script *after* the samples +## have been populated, so that it performs an inplace replacement +## of the vcf inputs with their converted tabular counterparts. +## +## All samples are all tabular after this point +source('$__tool_directory__/helperFunctions.R') +source('$__tool_directory__/snpEffExtract.R') + +## 3. Galaxy Params +## -------------- +## Set the general script parameters from the UI +variant_frequency <- as.numeric( '$varfreq' ) +brewer_color_gene_annotation <- as.character( '$advanced.color' ) + +#if str($clustering.do) == "TRUE": +pheat_clustering <- TRUE +pheat_clustering_method <- as.character( '$clustering.method' ) +pheat_number_of_clusters <- as.integer( '$clustering.nclust' ) +#else +pheat_clustering <- FALSE +pheat_clustering_method <- "ward.D2" +pheat_number_of_clusters <- 5 +#end if + +ratio = as.numeric('$advanced.ratio') +out_ext = '$advanced.output_type' +out_file = paste0("tmp_output.", out_ext) + +## 4. Generate Heatmap (external script) +## ---------------------------------- +source('$__tool_directory__/heatmap_for_variants.R') + +]]> + </configfile> + </configfiles> + <inputs> + <param name="sinputs" format="tabular,vcf" type="data" multiple="true" + collection_type="list" label="Variant lists data" + help="Select at least two datasets (or a dataset collection) with variant lists (see the tool help below for format details). Datasets are expected to represent individual samples and dataset names will be used as sample identifiers." /> + <param name="varfreq" type="float" min="0" max="1" value="0.1" + label="Variant Frequency Threshold" + help="Only plot variants with an intrasample frequency above this threshold in at least one sample." /> + <section name="advanced" title="Image Properties" expanded="true"> + <param name="output_type" type="select" label="Plot output format" > + <option value="pdf" selected="true" >PDF</option> + <option value="png" >PNG</option> + <option value="svg">SVG</option> + <option value="tiff" >TIFF</option> + <option value="bmp" >BMP</option> + <option value="jpeg" >JPEG</option> + </param> + <param name="ratio" label="Cell Ratio" type="float" + min="0.05" value="0.67" max="20" + help="Width:Height ratio of individual heatmap cells" /> + <param name="color" type="select" label="Color palette used for the gene annotations" > + <option value="Set1" /> + <option value="Set2" /> + <option value="Set3" selected="true" /> + <option value="Pastel2" /> + <option value="Pastel1" /> + <option value="Paired" /> + <option value="Dark2" /> + <option value="Accent" /> + <option value="YlOrRd" /> + <option value="YlOrBr" /> + <option value="YlGnBu" /> + <option value="YlGn" /> + <option value="Reds" /> + <option value="RdPu" /> + <option value="Purples" /> + <option value="PuRd" /> + <option value="PuBuGn" /> + <option value="PuBu" /> + <option value="OrRd" /> + <option value="Oranges" /> + <option value="Greys" /> + <option value="Greens" /> + <option value="GnBu" /> + <option value="BuPu" /> + <option value="BuGn" /> + <option value="Blues" /> + <option value="Spectral" /> + <option value="RdYlGn" /> + <option value="RdYlBu" /> + <option value="RdGy" /> + <option value="RdBu" /> + <option value="PuOr" /> + <option value="PRGn" /> + <option value="PiYG" /> + <option value="BrBG" /> + </param> + </section> + <conditional name="clustering"> + <param name="do" type="select" label="Perform Clustering?" > + <option value="TRUE">Yes</option> + <option value="FALSE" selected="true">No</option> + </param> + <when value="TRUE" > + <param name="nclust" type="integer" + min="1" value="1" label="Number of clusters" /> + <param name="method" type="select" label="Clustering method" > + <option value="ward.D" /> + <option value="ward.D2" selected="true" /> + <option value="single" /> + <option value="complete" /> + <option value="average" >average (UPGMA)</option> + <option value="mcquitty" >mcquitty (WPGMA)</option> + <option value="median" >median (WPGMC)</option> + <option value="centroid" >centroid (UPGMC)</option> + </param> + </when> + <when value="FALSE" /> + </conditional> + </inputs> + <outputs> + <data name="outfile" format="pdf" from_work_dir="tmp_output.*" + label="Variant-Frequency Plot on ${on_string}: ${advanced.output_type}"> + <change_format> + <when input="advanced.output_type" value="svg" format="svg" /> + <when input="advanced.output_type" value="png" format="png" /> + <when input="advanced.output_type" value="tiff" format="tiff" /> + <when input="advanced.output_type" value="bmp" format="bmp" /> + <when input="advanced.output_type" value="jpeg" format="jpg" /> + </change_format> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <!-- PDF, tabular inputs --> + <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular,input443.tabular,input444.tabular" /> + <output name="outfile" ftype="pdf" value="heatmap.default.pdf" compare="sim_size" delta="250" /> + </test> + <test expect_num_outputs="1"> + <!-- PNG, multiple inputs, non-numeric IDS --> + <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input443.tabular,input444.tabular" /> + <param name="varfreq" value="0.5" /> + <section name="advanced" > + <param name="color" value="Spectral" /> + <param name="output_type" value="png" /> + </section> + <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="86000" /> + </test> + <test expect_num_outputs="1"> + <!-- SVG, clustering defaults --> + <param name="sinputs" ftype="tabular" value="input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular" /> + <conditional name="clustering"> + <param name="do" value="TRUE" /> + </conditional> + <section name="advanced" > + <param name="color" value="Greys" /> + <param name="ratio" value="0.8" /> + <param name="output_type" value="svg" /> + </section> + <output name="outfile" ftype="svg"> + <assert_contents> + <has_text text="viewBox="0 0 1156 335"" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <!-- JPEG, clustering extras, mixed alphanumeric labels --> + <param name="sinputs" ftype="tabular" value="input436.tabular,input443.tabular,input438.tabular,input444.tabular" /> + <conditional name="clustering"> + <param name="do" value="TRUE" /> + <param name="nclust" value="2" /> + <param name="method" value="centroid" /> + </conditional> + <section name="advanced" > + <param name="color" value="Purples" /> + <param name="ratio" value="1.2" /> + <param name="output_type" value="jpeg" /> + </section> + <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="121000" /> + </test> + <test expect_num_outputs="1"> + <!-- PDF, vcf test --> + <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf" /> + <section name="advanced" > + <param name="color" value="PuBuGn" /> + <param name="output_type" value="pdf" /> + </section> + <output name="outfile" ftype="pdf" value="heatmap.from_vcf.pdf" compare="sim_size" delta="250" /> + </test> + <test expect_num_outputs="1"> + <!-- SVG, problematic vcf test --> + <param name="sinputs" ftype="vcf" value="1084592.vcf,1085080.vcf,1085445.vcf,1085841.vcf,1085990.vcf" /> + <section name="advanced" > + <param name="output_type" value="svg" /> + </section> + <output name="outfile" ftype="svg"> + <assert_contents> + <has_text text="viewBox="0 0 754 271"" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <!-- SVG, Vcf test with problematic splice+syn at snpeff789.vcf for threshold = 0.0222 --> + <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf" /> + <param name="varfreq" value="0.0222" /> + <section name="advanced" > + <param name="output_type" value="svg" /> + </section> + <output name="outfile" ftype="svg"> + <assert_contents> + <has_text text="viewBox="0 0 3101 697"" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**What it does** + +This tool generates multi-sample variant-frequency plots from SnpEff-annotated +viral variant lists with optional hierarchical clustering of the samples. + +.. class:: Warning mark + + Currently, this tool has been tested only on SARS-CoV-2 variant data. + While the intention is to have it work for viral variant data in general, + be prepared for unexpected behavior with other input data at the current + development stage. + +---- + +The tool expects input variant lists in one of the following two formats: + +1. VCF datasets as produced by standard variant callers with + + - variant allele frequencies encoded in an ``AF`` INFO field + - variant functional genomic effects annotated using SnpEff's EFF format (SnpEff's ANN format is not currently supported!) + +2. tabular datasets with columns listing, at least, the following variant properties: + + - ``CHROM`` + - ``POS`` + - ``REF`` + - ``ALT`` + - ``AF`` + - ``EFF[*].AA`` + - ``EFF[*].GENE`` + - ``EFF[*].EFFECT`` + + Such files can be produced with SnpSift Extract Fields and can be useful if + preprocessing of the lists with standard text processing tools is required. + +---- + +Example output: + +.. image:: /static/images/example_output.png + + ]]></help> + <citations> + <citation type="bibtex">@unpublished{Fuchs2020, + author = {Fuchs, Jonas}, + title = {}, + year = {2020}, + note = {Multi-sample annotated viral variant-frequency plots based on the R pheatmap package.}, + address = {Institute for Virology, University of Freiburg} + }</citation> + </citations> +</tool>