changeset 1:e362b3143cde draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
author iuc
date Thu, 10 Dec 2020 13:41:29 +0000
parents 1062d6ad6503
children dc51db22310c
files heatmap_for_variants.R helperFunctions.R snpfreqplot.xml test-data/heatmap.clustering2.jpeg test-data/heatmap.default.pdf test-data/heatmap.from_vcf.pdf test-data/heatmap.imageopts.png test-data/input436.tabular
diffstat 8 files changed, 66 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/heatmap_for_variants.R	Wed Dec 02 21:23:06 2020 +0000
+++ b/heatmap_for_variants.R	Thu Dec 10 13:41:29 2020 +0000
@@ -18,8 +18,8 @@
 extractall_data <- function(id) {
     variants <- variant_files[[id]]
     tmp <- variants %>%
-        mutate(posalt = uni_select) %>%
-        select(posalt, AF)
+        mutate(unique_selectors = group_select) %>%
+        select(unique_selectors, AF)
     colnames(tmp) <- c("Mutation", id)
     return(tmp)
 }
@@ -27,9 +27,12 @@
 extractall_annots <- function(id) {
     variants <- variant_files[[id]]
     tmp <- variants %>%
-        mutate(posalt = uni_select,
+        mutate(unique_selectors = group_select,
                effect = EFF....EFFECT, gene = EFF....GENE) %>%
-        select(posalt, effect, gene)
+        select(unique_selectors, effect, gene)
+    # allow "." as an alternative missing value in EFF.EFFECT and EFF.GENE
+    tmp$effect <- sub("^\\.$", "", tmp$effect)
+    tmp$gene <- sub("^\\.$", "", tmp$gene)
     return(tmp)
 }
 
@@ -53,10 +56,11 @@
 ann_final <- processed_annots %>%
     reduce(function(x, y) {
         unique(rbind(x, y))}) %>%
-    filter(posalt %in% colnames(final))         ## apply frequency filter
+    ## apply frequency filter
+    filter(unique_selectors %in% colnames(final))
 ann_final <- as_tibble(ann_final[str_order(
-    ann_final$posalt, numeric = T), ]) %>%
-    column_to_rownames("posalt")                       ## sort
+    ann_final$unique_selectors, numeric = T), ]) %>%
+    column_to_rownames("unique_selectors")  ## sort
 
                                         # rename annotations
 trans <- function(x, mapping, replace_missing=NULL) {
@@ -146,6 +150,41 @@
                  pheat_number_of_clusters))
 }
 
+
+                                        # Fix Labels
+## Prettify names, check for label parity between final and ann_final
+fix_label <- function(name) {
+    ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A
+    ##'     to: 424 AGT… > A
+    cols <- unlist(str_split(name, " "))
+    ## first 3 are POS REF ALT, and the rest are optional differences
+    pos_ref_alt <- cols[1:3]
+    rest <- ""
+    if (length(cols) > 3) {
+        rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " "))
+    }
+    ## Trim the REF or ALT if too long
+    if (str_length(pos_ref_alt[2]) > 3) {
+        pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…")
+    }
+    if (str_length(pos_ref_alt[3]) > 3) {
+        pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…")
+    }
+    ## Join required
+    new_name <- paste0(pos_ref_alt[1], " ",
+                       pos_ref_alt[2], " > ",
+                       pos_ref_alt[3])
+    ## Join rest
+    new_name <- paste0(new_name, " ", paste(rest))
+}
+
+colnames(final) <- sapply(colnames(final), fix_label)
+rownames(ann_final) <- sapply(rownames(ann_final), fix_label)
+## sanity test
+stopifnot(all(colnames(final) %in% rownames(ann_final)))
+
+
+                                        # Perform Plotting
 get_plot_dims <- function(heat_map) {
     ## get the dimensions of a pheatmap object
     ## useful for plot formats that can't be written to a file directly, but
--- a/helperFunctions.R	Wed Dec 02 21:23:06 2020 +0000
+++ b/helperFunctions.R	Thu Dec 10 13:41:29 2020 +0000
@@ -38,8 +38,8 @@
             }
         }
     }
-    uni_select <- c("POS", "ALT", diff.colnames)
-    return(lines[, uni_select] %>% unite(uni_select, sep = " ")) # nolint
+    group_select <- c("POS", "REF", "ALT", diff.colnames)
+    return(lines[, group_select] %>% unite(group_select, sep = " ")) # nolint
 }
 
 split_table_and_process <- function(tab) {
@@ -51,21 +51,21 @@
     #'
     #' This function is necessary because tidyr is difficult
     #' to write custom group binding functions.
-    posalts <- tab %>% group_by(POS, ALT) %>% select(POS, ALT) # nolint
+    group_ind <- tab %>% group_by(POS, REF, ALT) %>% select(POS, REF, ALT) # nolint
     nlines <- nrow(tab)
     groups <- list()
     groups[[1]] <- c(1, 1)
-    last_pa <- paste(posalts[1, ])
+    last_pa <- paste(group_ind[1, ])
     for (r in 2:nlines) {
-        curr_pa <- paste(posalts[r, ])
-        posalt_diff_between_lines <- !all(last_pa == curr_pa)
-        if (posalt_diff_between_lines) {
+        curr_pa <- paste(group_ind[r, ])
+        group_ind_diff_between_lines <- !all(last_pa == curr_pa)
+        if (group_ind_diff_between_lines) {
             ## end of current group, start of new
             groups[[length(groups)]][2] <- r - 1     ## change prev end
             groups[[length(groups) + 1]] <- c(r, r)  ## set (start, end)
         } else if (r == nlines) {
             ## i.e. if the very last line shares
-            ## the same POS ALT as the one before,
+            ## the same POS REF ALT as the one before,
             ## close current group.
             groups[[length(groups)]][2] <- r
         }
--- a/snpfreqplot.xml	Wed Dec 02 21:23:06 2020 +0000
+++ b/snpfreqplot.xml	Thu Dec 10 13:41:29 2020 +0000
@@ -3,14 +3,13 @@
     <description>Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data</description>
     <macros>
         <token name="@VERSION@">1.0</token>
-        <token name="@GALAXY_VERSION@">0</token>
+        <token name="@GALAXY_VERSION@">1</token>
     </macros>
     <requirements>
         <requirement type="package" version="4.0">r-base</requirement>
         <requirement type="package" version="1.0.12">r-pheatmap</requirement>
         <requirement type="package" version="1.3.0">r-tidyverse</requirement>
         <requirement type="package" version="1.36.0">bioconductor-variantannotation</requirement>
-        <requirement type="package" version="">xorg-libxt</requirement>
     </requirements>
     <edam_topics>
         <edam_topic>topic_0797</edam_topic>
@@ -187,7 +186,7 @@
                 <param name="color" value="Spectral" />
                 <param name="output_type" value="png" />
             </section>
-            <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="86000" />
+            <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="100000" />
         </test>
         <test expect_num_outputs="1">
             <!-- SVG, clustering defaults -->
@@ -202,7 +201,7 @@
             </section>
             <output name="outfile" ftype="svg">
                 <assert_contents>
-                    <has_text text="viewBox=&quot;0 0 1156 335&quot;" />
+                    <has_text text="viewBox=&quot;0 0 1156 361&quot;" />
                 </assert_contents>
             </output>
         </test>
@@ -219,7 +218,7 @@
                 <param name="ratio" value="1.2" />
                 <param name="output_type" value="jpeg" />
             </section>
-            <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="121000" />
+            <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="130000" />
         </test>
         <test expect_num_outputs="1">
             <!-- PDF, vcf test -->
@@ -238,7 +237,7 @@
             </section>
             <output name="outfile" ftype="svg">
                 <assert_contents>
-                    <has_text text="viewBox=&quot;0 0 754 271&quot;" />
+                    <has_text text="viewBox=&quot;0 0 754 292&quot;" />
                 </assert_contents>
             </output>
         </test>
@@ -251,7 +250,7 @@
             </section>
             <output name="outfile" ftype="svg">
                 <assert_contents>
-                    <has_text text="viewBox=&quot;0 0 3101 697&quot;" />
+                    <has_text text="viewBox=&quot;0 0 3101 879&quot;" />
                 </assert_contents>
             </output>
         </test>
@@ -292,6 +291,11 @@
    Such files can be produced with SnpSift Extract Fields and can be useful if
    preprocessing of the lists with standard text processing tools is required.
 
+   .. class:: infomark
+
+   To represent empty EFF fields in the tabular format you can choose between
+   ``.`` and the empty string.
+
 ----
 
 Example output:
Binary file test-data/heatmap.clustering2.jpeg has changed
Binary file test-data/heatmap.default.pdf has changed
Binary file test-data/heatmap.from_vcf.pdf has changed
Binary file test-data/heatmap.imageopts.png has changed
--- a/test-data/input436.tabular	Wed Dec 02 21:23:06 2020 +0000
+++ b/test-data/input436.tabular	Thu Dec 10 13:41:29 2020 +0000
@@ -1,5 +1,5 @@
 CHROM	POS	REF	ALT	AF	EFF[*].AA	EFF[*].GENE	EFF[*].EFFECT
-NC_045512	241	C	T	0.992195			
+NC_045512	241	C	T	0.992195	.	.	.
 NC_045512	685	AAAGTCATTT	A	0.363753	KSF141	ORF1ab	CODON_DELETION
 NC_045512	1059	C	T	0.988211	T265I	ORF1ab	NON_SYNONYMOUS_CODING
 NC_045512	3037	C	T	0.988485	F924	ORF1ab	SYNONYMOUS_CODING