Repository 'snpfreqplot'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/snpfreqplot

Changeset 1:e362b3143cde (2020-12-10)
Previous changeset 0:1062d6ad6503 (2020-12-02) Next changeset 2:dc51db22310c (2020-12-18)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
modified:
heatmap_for_variants.R
helperFunctions.R
snpfreqplot.xml
test-data/heatmap.clustering2.jpeg
test-data/heatmap.default.pdf
test-data/heatmap.from_vcf.pdf
test-data/heatmap.imageopts.png
test-data/input436.tabular
b
diff -r 1062d6ad6503 -r e362b3143cde heatmap_for_variants.R
--- a/heatmap_for_variants.R Wed Dec 02 21:23:06 2020 +0000
+++ b/heatmap_for_variants.R Thu Dec 10 13:41:29 2020 +0000
[
@@ -18,8 +18,8 @@
 extractall_data <- function(id) {
     variants <- variant_files[[id]]
     tmp <- variants %>%
-        mutate(posalt = uni_select) %>%
-        select(posalt, AF)
+        mutate(unique_selectors = group_select) %>%
+        select(unique_selectors, AF)
     colnames(tmp) <- c("Mutation", id)
     return(tmp)
 }
@@ -27,9 +27,12 @@
 extractall_annots <- function(id) {
     variants <- variant_files[[id]]
     tmp <- variants %>%
-        mutate(posalt = uni_select,
+        mutate(unique_selectors = group_select,
                effect = EFF....EFFECT, gene = EFF....GENE) %>%
-        select(posalt, effect, gene)
+        select(unique_selectors, effect, gene)
+    # allow "." as an alternative missing value in EFF.EFFECT and EFF.GENE
+    tmp$effect <- sub("^\\.$", "", tmp$effect)
+    tmp$gene <- sub("^\\.$", "", tmp$gene)
     return(tmp)
 }
 
@@ -53,10 +56,11 @@
 ann_final <- processed_annots %>%
     reduce(function(x, y) {
         unique(rbind(x, y))}) %>%
-    filter(posalt %in% colnames(final))         ## apply frequency filter
+    ## apply frequency filter
+    filter(unique_selectors %in% colnames(final))
 ann_final <- as_tibble(ann_final[str_order(
-    ann_final$posalt, numeric = T), ]) %>%
-    column_to_rownames("posalt")                       ## sort
+    ann_final$unique_selectors, numeric = T), ]) %>%
+    column_to_rownames("unique_selectors")  ## sort
 
                                         # rename annotations
 trans <- function(x, mapping, replace_missing=NULL) {
@@ -146,6 +150,41 @@
                  pheat_number_of_clusters))
 }
 
+
+                                        # Fix Labels
+## Prettify names, check for label parity between final and ann_final
+fix_label <- function(name) {
+    ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A
+    ##'     to: 424 AGT… > A
+    cols <- unlist(str_split(name, " "))
+    ## first 3 are POS REF ALT, and the rest are optional differences
+    pos_ref_alt <- cols[1:3]
+    rest <- ""
+    if (length(cols) > 3) {
+        rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " "))
+    }
+    ## Trim the REF or ALT if too long
+    if (str_length(pos_ref_alt[2]) > 3) {
+        pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…")
+    }
+    if (str_length(pos_ref_alt[3]) > 3) {
+        pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…")
+    }
+    ## Join required
+    new_name <- paste0(pos_ref_alt[1], " ",
+                       pos_ref_alt[2], " > ",
+                       pos_ref_alt[3])
+    ## Join rest
+    new_name <- paste0(new_name, " ", paste(rest))
+}
+
+colnames(final) <- sapply(colnames(final), fix_label)
+rownames(ann_final) <- sapply(rownames(ann_final), fix_label)
+## sanity test
+stopifnot(all(colnames(final) %in% rownames(ann_final)))
+
+
+                                        # Perform Plotting
 get_plot_dims <- function(heat_map) {
     ## get the dimensions of a pheatmap object
     ## useful for plot formats that can't be written to a file directly, but
b
diff -r 1062d6ad6503 -r e362b3143cde helperFunctions.R
--- a/helperFunctions.R Wed Dec 02 21:23:06 2020 +0000
+++ b/helperFunctions.R Thu Dec 10 13:41:29 2020 +0000
[
@@ -38,8 +38,8 @@
             }
         }
     }
-    uni_select <- c("POS", "ALT", diff.colnames)
-    return(lines[, uni_select] %>% unite(uni_select, sep = " ")) # nolint
+    group_select <- c("POS", "REF", "ALT", diff.colnames)
+    return(lines[, group_select] %>% unite(group_select, sep = " ")) # nolint
 }
 
 split_table_and_process <- function(tab) {
@@ -51,21 +51,21 @@
     #'
     #' This function is necessary because tidyr is difficult
     #' to write custom group binding functions.
-    posalts <- tab %>% group_by(POS, ALT) %>% select(POS, ALT) # nolint
+    group_ind <- tab %>% group_by(POS, REF, ALT) %>% select(POS, REF, ALT) # nolint
     nlines <- nrow(tab)
     groups <- list()
     groups[[1]] <- c(1, 1)
-    last_pa <- paste(posalts[1, ])
+    last_pa <- paste(group_ind[1, ])
     for (r in 2:nlines) {
-        curr_pa <- paste(posalts[r, ])
-        posalt_diff_between_lines <- !all(last_pa == curr_pa)
-        if (posalt_diff_between_lines) {
+        curr_pa <- paste(group_ind[r, ])
+        group_ind_diff_between_lines <- !all(last_pa == curr_pa)
+        if (group_ind_diff_between_lines) {
             ## end of current group, start of new
             groups[[length(groups)]][2] <- r - 1     ## change prev end
             groups[[length(groups) + 1]] <- c(r, r)  ## set (start, end)
         } else if (r == nlines) {
             ## i.e. if the very last line shares
-            ## the same POS ALT as the one before,
+            ## the same POS REF ALT as the one before,
             ## close current group.
             groups[[length(groups)]][2] <- r
         }
b
diff -r 1062d6ad6503 -r e362b3143cde snpfreqplot.xml
--- a/snpfreqplot.xml Wed Dec 02 21:23:06 2020 +0000
+++ b/snpfreqplot.xml Thu Dec 10 13:41:29 2020 +0000
b
@@ -3,14 +3,13 @@
     <description>Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data</description>
     <macros>
         <token name="@VERSION@">1.0</token>
-        <token name="@GALAXY_VERSION@">0</token>
+        <token name="@GALAXY_VERSION@">1</token>
     </macros>
     <requirements>
         <requirement type="package" version="4.0">r-base</requirement>
         <requirement type="package" version="1.0.12">r-pheatmap</requirement>
         <requirement type="package" version="1.3.0">r-tidyverse</requirement>
         <requirement type="package" version="1.36.0">bioconductor-variantannotation</requirement>
-        <requirement type="package" version="">xorg-libxt</requirement>
     </requirements>
     <edam_topics>
         <edam_topic>topic_0797</edam_topic>
@@ -187,7 +186,7 @@
                 <param name="color" value="Spectral" />
                 <param name="output_type" value="png" />
             </section>
-            <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="86000" />
+            <output name="outfile" ftype="png" value="heatmap.imageopts.png" compare="sim_size" delta="100000" />
         </test>
         <test expect_num_outputs="1">
             <!-- SVG, clustering defaults -->
@@ -202,7 +201,7 @@
             </section>
             <output name="outfile" ftype="svg">
                 <assert_contents>
-                    <has_text text="viewBox=&quot;0 0 1156 335&quot;" />
+                    <has_text text="viewBox=&quot;0 0 1156 361&quot;" />
                 </assert_contents>
             </output>
         </test>
@@ -219,7 +218,7 @@
                 <param name="ratio" value="1.2" />
                 <param name="output_type" value="jpeg" />
             </section>
-            <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="121000" />
+            <output name="outfile" ftype="jpg" value="heatmap.clustering2.jpeg" compare="sim_size" delta="130000" />
         </test>
         <test expect_num_outputs="1">
             <!-- PDF, vcf test -->
@@ -238,7 +237,7 @@
             </section>
             <output name="outfile" ftype="svg">
                 <assert_contents>
-                    <has_text text="viewBox=&quot;0 0 754 271&quot;" />
+                    <has_text text="viewBox=&quot;0 0 754 292&quot;" />
                 </assert_contents>
             </output>
         </test>
@@ -251,7 +250,7 @@
             </section>
             <output name="outfile" ftype="svg">
                 <assert_contents>
-                    <has_text text="viewBox=&quot;0 0 3101 697&quot;" />
+                    <has_text text="viewBox=&quot;0 0 3101 879&quot;" />
                 </assert_contents>
             </output>
         </test>
@@ -292,6 +291,11 @@
    Such files can be produced with SnpSift Extract Fields and can be useful if
    preprocessing of the lists with standard text processing tools is required.
 
+   .. class:: infomark
+
+   To represent empty EFF fields in the tabular format you can choose between
+   ``.`` and the empty string.
+
 ----
 
 Example output:
b
diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.clustering2.jpeg
b
Binary file test-data/heatmap.clustering2.jpeg has changed
b
diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.default.pdf
b
Binary file test-data/heatmap.default.pdf has changed
b
diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.from_vcf.pdf
b
Binary file test-data/heatmap.from_vcf.pdf has changed
b
diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.imageopts.png
b
Binary file test-data/heatmap.imageopts.png has changed
b
diff -r 1062d6ad6503 -r e362b3143cde test-data/input436.tabular
--- a/test-data/input436.tabular Wed Dec 02 21:23:06 2020 +0000
+++ b/test-data/input436.tabular Thu Dec 10 13:41:29 2020 +0000
[
@@ -1,5 +1,5 @@
 CHROM POS REF ALT AF EFF[*].AA EFF[*].GENE EFF[*].EFFECT
-NC_045512 241 C T 0.992195
+NC_045512 241 C T 0.992195 . . .
 NC_045512 685 AAAGTCATTT A 0.363753 KSF141 ORF1ab CODON_DELETION
 NC_045512 1059 C T 0.988211 T265I ORF1ab NON_SYNONYMOUS_CODING
 NC_045512 3037 C T 0.988485 F924 ORF1ab SYNONYMOUS_CODING