Mercurial > repos > iuc > snpfreqplot
view snpfreqplot.xml @ 0:1062d6ad6503 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1f35303af979c16d9a3126dbc882a59f686ace5d"
author | iuc |
---|---|
date | Wed, 02 Dec 2020 21:23:06 +0000 |
parents | |
children | e362b3143cde |
line wrap: on
line source
<tool id="snpfreqplot" name="Variant Frequency Plot" version="@VERSION@+galaxy@GALAXY_VERSION@" profile="20.09" license="GPL-3.0-or-later" > <description>Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data</description> <macros> <token name="@VERSION@">1.0</token> <token name="@GALAXY_VERSION@">0</token> </macros> <requirements> <requirement type="package" version="4.0">r-base</requirement> <requirement type="package" version="1.0.12">r-pheatmap</requirement> <requirement type="package" version="1.3.0">r-tidyverse</requirement> <requirement type="package" version="1.36.0">bioconductor-variantannotation</requirement> <requirement type="package" version="">xorg-libxt</requirement> </requirements> <edam_topics> <edam_topic>topic_0797</edam_topic> <edam_topic>topic_0092</edam_topic> </edam_topics> <edam_operations> <edam_operation>operation_3436</edam_operation> </edam_operations> <command detect_errors="exit_code"><![CDATA[ #set $outfile = "tmp_output." + str($advanced.output_type) cat '$__tool_directory__/helperFunctions.R' > /dev/null && cat '$__tool_directory__/snpEffExtract.R' > /dev/null && cat '$__tool_directory__/heatmap_for_variants.R' > /dev/null && echo "output file: $outfile" && Rscript '$configscript' ]]> </command> <configfiles> <configfile name="configscript"><![CDATA[ ## 1. Set Sample Inputs ## ------------------ ## Create a dataframe of sample ids, filetypes, and filenames ## from the input collection. At this point, the list could be ## of mixed type (vcf and tabular), though maybe Galaxy ## restricts that. samples = list(ids = c(), exts= c(), files = c()) #for $i, $file in enumerate($sinputs): samples\$ids = c(samples\$ids, '${file.element_identifier}') samples\$exts = c(samples\$exts, '${file.extension}') samples\$files = c(samples\$files, '${file}') #end for samples = data.frame(samples, stringsAsFactors=F) ## 2. Input Conversion (external script) ## ---------------------------------- ## We source the input conversion script *after* the samples ## have been populated, so that it performs an inplace replacement ## of the vcf inputs with their converted tabular counterparts. ## ## All samples are all tabular after this point source('$__tool_directory__/helperFunctions.R') source('$__tool_directory__/snpEffExtract.R') ## 3. Galaxy Params ## -------------- ## Set the general script parameters from the UI variant_frequency <- as.numeric( '$varfreq' ) brewer_color_gene_annotation <- as.character( '$advanced.color' ) #if str($clustering.do) == "TRUE": pheat_clustering <- TRUE pheat_clustering_method <- as.character( '$clustering.method' ) pheat_number_of_clusters <- as.integer( '$clustering.nclust' ) #else pheat_clustering <- FALSE pheat_clustering_method <- "ward.D2" pheat_number_of_clusters <- 5 #end if ratio = as.numeric('$advanced.ratio') out_ext = '$advanced.output_type' out_file = paste0("tmp_output.", out_ext) ## 4. Generate Heatmap (external script) ## ---------------------------------- source('$__tool_directory__/heatmap_for_variants.R') ]]> </configfile> </configfiles> <inputs> <param name="sinputs" format="tabular,vcf" type="data" multiple="true" collection_type="list" label="Variant lists data" help="Select at least two datasets (or a dataset collection) with variant lists (see the tool help below for format details). Datasets are expected to represent individual samples and dataset names will be used as sample identifiers." /> <param name="varfreq" type="float" min="0" max="1" value="0.1" label="Variant Frequency Threshold" help="Only plot variants with an intrasample frequency above this threshold in at least one sample." /> <section name="advanced" title="Image Properties" expanded="true"> <param name="output_type" type="select" label="Plot output format" > <option value="pdf" selected="true" >PDF</option> <option value="png" >PNG</option> <option value="svg">SVG</option> <option value="tiff" >TIFF</option> <option value="bmp" >BMP</option> <option value="jpeg" >JPEG</option> </param> <param name="ratio" label="Cell Ratio" type="float" min="0.05" value="0.67" max="20" help="Width:Height ratio of individual heatmap cells" /> <param name="color" type="select" label="Color palette used for the gene annotations" > <option value="Set1" /> <option value="Set2" /> <option value="Set3" selected="true" /> <option value="Pastel2" /> <option value="Pastel1" /> <option value="Paired" /> <option value="Dark2" /> <option value="Accent" /> <option value="YlOrRd" /> <option value="YlOrBr" /> <option value="YlGnBu" /> <option value="YlGn" /> <option value="Reds" /> <option value="RdPu" /> <option value="Purples" /> <option value="PuRd" /> <option value="PuBuGn" /> <option value="PuBu" /> <option value="OrRd" /> <option value="Oranges" /> <option value="Greys" /> <option value="Greens" /> <option value="GnBu" /> <option value="BuPu" /> <option value="BuGn" /> <option value="Blues" /> <option value="Spectral" /> <option value="RdYlGn" /> <option value="RdYlBu" /> <option value="RdGy" /> <option value="RdBu" /> <option value="PuOr" /> <option value="PRGn" /> <option value="PiYG" /> <option value="BrBG" /> </param> </section> <conditional name="clustering"> <param name="do" type="select" label="Perform Clustering?" > <option value="TRUE">Yes</option> <option value="FALSE" selected="true">No</option> </param> <when value="TRUE" > <param name="nclust" type="integer" min="1" value="1" label="Number of clusters" /> <param name="method" type="select" label="Clustering method" > <option value="ward.D" /> <option value="ward.D2" selected="true" /> <option value="single" /> <option value="complete" /> <option value="average" >average (UPGMA)</option> <option value="mcquitty" >mcquitty (WPGMA)</option> <option value="median" >median (WPGMC)</option> <option value="centroid" >centroid (UPGMC)</option> </param> </when> <when value="FALSE" /> </conditional> </inputs> <outputs> <data name="outfile" format="pdf" from_work_dir="tmp_output.*" label="Variant-Frequency Plot on ${on_string}: ${advanced.output_type}"> <change_format> <when input="advanced.output_type" value="svg" format="svg" /> <when input="advanced.output_type" value="png" format="png" /> <when input="advanced.output_type" value="tiff" format="tiff" /> <when input="advanced.output_type" value="bmp" format="bmp" /> <when input="advanced.output_type" value="jpeg" format="jpg" /> </change_format> </data> </outputs> <tests> <test expect_num_outputs="1"> <!-- PDF, tabular inputs --> <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular,input443.tabular,input444.tabular" /> <output name="outfile" ftype="pdf" value="heatmap.default.pdf" compare="sim_size" delta="250" /> </test> <test expect_num_outputs="1"> <!-- PNG, multiple inputs, non-numeric IDS --> <param name="sinputs" ftype="tabular" value="input436.tabular,input437.tabular,input443.tabular,input444.tabular" /> <param name="varfreq" value="0.5" /> <section name="advanced" > <param name="color" value="Spectral" /> <param name="output_type" value="png" /> </section> <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="86000" /> </test> <test expect_num_outputs="1"> <!-- SVG, clustering defaults --> <param name="sinputs" ftype="tabular" value="input438.tabular,input439.tabular,input440.tabular,input441.tabular,input442.tabular" /> <conditional name="clustering"> <param name="do" value="TRUE" /> </conditional> <section name="advanced" > <param name="color" value="Greys" /> <param name="ratio" value="0.8" /> <param name="output_type" value="svg" /> </section> <output name="outfile" ftype="svg"> <assert_contents> <has_text text="viewBox="0 0 1156 335"" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <!-- JPEG, clustering extras, mixed alphanumeric labels --> <param name="sinputs" ftype="tabular" value="input436.tabular,input443.tabular,input438.tabular,input444.tabular" /> <conditional name="clustering"> <param name="do" value="TRUE" /> <param name="nclust" value="2" /> <param name="method" value="centroid" /> </conditional> <section name="advanced" > <param name="color" value="Purples" /> <param name="ratio" value="1.2" /> <param name="output_type" value="jpeg" /> </section> <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="121000" /> </test> <test expect_num_outputs="1"> <!-- PDF, vcf test --> <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf" /> <section name="advanced" > <param name="color" value="PuBuGn" /> <param name="output_type" value="pdf" /> </section> <output name="outfile" ftype="pdf" value="heatmap.from_vcf.pdf" compare="sim_size" delta="250" /> </test> <test expect_num_outputs="1"> <!-- SVG, problematic vcf test --> <param name="sinputs" ftype="vcf" value="1084592.vcf,1085080.vcf,1085445.vcf,1085841.vcf,1085990.vcf" /> <section name="advanced" > <param name="output_type" value="svg" /> </section> <output name="outfile" ftype="svg"> <assert_contents> <has_text text="viewBox="0 0 754 271"" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <!-- SVG, Vcf test with problematic splice+syn at snpeff789.vcf for threshold = 0.0222 --> <param name="sinputs" ftype="vcf" value="snpeff.123.vcf,snpeff.456.vcf,snpeff.789.vcf" /> <param name="varfreq" value="0.0222" /> <section name="advanced" > <param name="output_type" value="svg" /> </section> <output name="outfile" ftype="svg"> <assert_contents> <has_text text="viewBox="0 0 3101 697"" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ **What it does** This tool generates multi-sample variant-frequency plots from SnpEff-annotated viral variant lists with optional hierarchical clustering of the samples. .. class:: Warning mark Currently, this tool has been tested only on SARS-CoV-2 variant data. While the intention is to have it work for viral variant data in general, be prepared for unexpected behavior with other input data at the current development stage. ---- The tool expects input variant lists in one of the following two formats: 1. VCF datasets as produced by standard variant callers with - variant allele frequencies encoded in an ``AF`` INFO field - variant functional genomic effects annotated using SnpEff's EFF format (SnpEff's ANN format is not currently supported!) 2. tabular datasets with columns listing, at least, the following variant properties: - ``CHROM`` - ``POS`` - ``REF`` - ``ALT`` - ``AF`` - ``EFF[*].AA`` - ``EFF[*].GENE`` - ``EFF[*].EFFECT`` Such files can be produced with SnpSift Extract Fields and can be useful if preprocessing of the lists with standard text processing tools is required. ---- Example output: .. image:: /static/images/example_output.png ]]></help> <citations> <citation type="bibtex">@unpublished{Fuchs2020, author = {Fuchs, Jonas}, title = {}, year = {2020}, note = {Multi-sample annotated viral variant-frequency plots based on the R pheatmap package.}, address = {Institute for Virology, University of Freiburg} }</citation> </citations> </tool>