# HG changeset patch
# User iuc
# Date 1607607689 0
# Node ID e362b3143cde66c51dc3c4e53264def316f741b7
# Parent 1062d6ad65032cf464e0ae733cf3d637adba3057
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit 1bde09fccd1a5412240ebd5c1f34a45ad73cebe2"
diff -r 1062d6ad6503 -r e362b3143cde heatmap_for_variants.R
--- a/heatmap_for_variants.R Wed Dec 02 21:23:06 2020 +0000
+++ b/heatmap_for_variants.R Thu Dec 10 13:41:29 2020 +0000
@@ -18,8 +18,8 @@
extractall_data <- function(id) {
variants <- variant_files[[id]]
tmp <- variants %>%
- mutate(posalt = uni_select) %>%
- select(posalt, AF)
+ mutate(unique_selectors = group_select) %>%
+ select(unique_selectors, AF)
colnames(tmp) <- c("Mutation", id)
return(tmp)
}
@@ -27,9 +27,12 @@
extractall_annots <- function(id) {
variants <- variant_files[[id]]
tmp <- variants %>%
- mutate(posalt = uni_select,
+ mutate(unique_selectors = group_select,
effect = EFF....EFFECT, gene = EFF....GENE) %>%
- select(posalt, effect, gene)
+ select(unique_selectors, effect, gene)
+ # allow "." as an alternative missing value in EFF.EFFECT and EFF.GENE
+ tmp$effect <- sub("^\\.$", "", tmp$effect)
+ tmp$gene <- sub("^\\.$", "", tmp$gene)
return(tmp)
}
@@ -53,10 +56,11 @@
ann_final <- processed_annots %>%
reduce(function(x, y) {
unique(rbind(x, y))}) %>%
- filter(posalt %in% colnames(final)) ## apply frequency filter
+ ## apply frequency filter
+ filter(unique_selectors %in% colnames(final))
ann_final <- as_tibble(ann_final[str_order(
- ann_final$posalt, numeric = T), ]) %>%
- column_to_rownames("posalt") ## sort
+ ann_final$unique_selectors, numeric = T), ]) %>%
+ column_to_rownames("unique_selectors") ## sort
# rename annotations
trans <- function(x, mapping, replace_missing=NULL) {
@@ -146,6 +150,41 @@
pheat_number_of_clusters))
}
+
+ # Fix Labels
+## Prettify names, check for label parity between final and ann_final
+fix_label <- function(name) {
+ ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A
+ ##' to: 424 AGT… > A
+ cols <- unlist(str_split(name, " "))
+ ## first 3 are POS REF ALT, and the rest are optional differences
+ pos_ref_alt <- cols[1:3]
+ rest <- ""
+ if (length(cols) > 3) {
+ rest <- paste0(" :: ", paste(cols[4:length(cols)], sep = " "))
+ }
+ ## Trim the REF or ALT if too long
+ if (str_length(pos_ref_alt[2]) > 3) {
+ pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…")
+ }
+ if (str_length(pos_ref_alt[3]) > 3) {
+ pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…")
+ }
+ ## Join required
+ new_name <- paste0(pos_ref_alt[1], " ",
+ pos_ref_alt[2], " > ",
+ pos_ref_alt[3])
+ ## Join rest
+ new_name <- paste0(new_name, " ", paste(rest))
+}
+
+colnames(final) <- sapply(colnames(final), fix_label)
+rownames(ann_final) <- sapply(rownames(ann_final), fix_label)
+## sanity test
+stopifnot(all(colnames(final) %in% rownames(ann_final)))
+
+
+ # Perform Plotting
get_plot_dims <- function(heat_map) {
## get the dimensions of a pheatmap object
## useful for plot formats that can't be written to a file directly, but
diff -r 1062d6ad6503 -r e362b3143cde helperFunctions.R
--- a/helperFunctions.R Wed Dec 02 21:23:06 2020 +0000
+++ b/helperFunctions.R Thu Dec 10 13:41:29 2020 +0000
@@ -38,8 +38,8 @@
}
}
}
- uni_select <- c("POS", "ALT", diff.colnames)
- return(lines[, uni_select] %>% unite(uni_select, sep = " ")) # nolint
+ group_select <- c("POS", "REF", "ALT", diff.colnames)
+ return(lines[, group_select] %>% unite(group_select, sep = " ")) # nolint
}
split_table_and_process <- function(tab) {
@@ -51,21 +51,21 @@
#'
#' This function is necessary because tidyr is difficult
#' to write custom group binding functions.
- posalts <- tab %>% group_by(POS, ALT) %>% select(POS, ALT) # nolint
+ group_ind <- tab %>% group_by(POS, REF, ALT) %>% select(POS, REF, ALT) # nolint
nlines <- nrow(tab)
groups <- list()
groups[[1]] <- c(1, 1)
- last_pa <- paste(posalts[1, ])
+ last_pa <- paste(group_ind[1, ])
for (r in 2:nlines) {
- curr_pa <- paste(posalts[r, ])
- posalt_diff_between_lines <- !all(last_pa == curr_pa)
- if (posalt_diff_between_lines) {
+ curr_pa <- paste(group_ind[r, ])
+ group_ind_diff_between_lines <- !all(last_pa == curr_pa)
+ if (group_ind_diff_between_lines) {
## end of current group, start of new
groups[[length(groups)]][2] <- r - 1 ## change prev end
groups[[length(groups) + 1]] <- c(r, r) ## set (start, end)
} else if (r == nlines) {
## i.e. if the very last line shares
- ## the same POS ALT as the one before,
+ ## the same POS REF ALT as the one before,
## close current group.
groups[[length(groups)]][2] <- r
}
diff -r 1062d6ad6503 -r e362b3143cde snpfreqplot.xml
--- a/snpfreqplot.xml Wed Dec 02 21:23:06 2020 +0000
+++ b/snpfreqplot.xml Thu Dec 10 13:41:29 2020 +0000
@@ -3,14 +3,13 @@
Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data
1.0
- 0
+ 1
r-base
r-pheatmap
r-tidyverse
bioconductor-variantannotation
- xorg-libxt
topic_0797
@@ -187,7 +186,7 @@
-
+
@@ -202,7 +201,7 @@
@@ -219,7 +218,7 @@
-
+
@@ -238,7 +237,7 @@
@@ -251,7 +250,7 @@
@@ -292,6 +291,11 @@
Such files can be produced with SnpSift Extract Fields and can be useful if
preprocessing of the lists with standard text processing tools is required.
+ .. class:: infomark
+
+ To represent empty EFF fields in the tabular format you can choose between
+ ``.`` and the empty string.
+
----
Example output:
diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.clustering2.jpeg
Binary file test-data/heatmap.clustering2.jpeg has changed
diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.default.pdf
Binary file test-data/heatmap.default.pdf has changed
diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.from_vcf.pdf
Binary file test-data/heatmap.from_vcf.pdf has changed
diff -r 1062d6ad6503 -r e362b3143cde test-data/heatmap.imageopts.png
Binary file test-data/heatmap.imageopts.png has changed
diff -r 1062d6ad6503 -r e362b3143cde test-data/input436.tabular
--- a/test-data/input436.tabular Wed Dec 02 21:23:06 2020 +0000
+++ b/test-data/input436.tabular Thu Dec 10 13:41:29 2020 +0000
@@ -1,5 +1,5 @@
CHROM POS REF ALT AF EFF[*].AA EFF[*].GENE EFF[*].EFFECT
-NC_045512 241 C T 0.992195
+NC_045512 241 C T 0.992195 . . .
NC_045512 685 AAAGTCATTT A 0.363753 KSF141 ORF1ab CODON_DELETION
NC_045512 1059 C T 0.988211 T265I ORF1ab NON_SYNONYMOUS_CODING
NC_045512 3037 C T 0.988485 F924 ORF1ab SYNONYMOUS_CODING