# HG changeset patch
# User iuc
# Date 1609930553 0
# Node ID 3d0adeee3f2b4f8ed9d9d7f67baa84f2a4e5b201
# Parent dc51db22310c7bf74761bd1bb9c49df6fb65e36d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit c062eb1cd00ce9d565f3e2f3b042b3dd90d78ce4"
diff -r dc51db22310c -r 3d0adeee3f2b heatmap_for_variants.R
--- a/heatmap_for_variants.R Fri Dec 18 23:48:01 2020 +0000
+++ b/heatmap_for_variants.R Wed Jan 06 10:55:53 2021 +0000
@@ -153,9 +153,7 @@
# Fix Labels
## Prettify names, check for label parity between final and ann_final
-fix_label <- function(name) {
- ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A
- ##' to: 424 AGT… > A
+fix_label <- function(name, min_bases) {
cols <- unlist(str_split(name, " "))
## first 3 are POS REF ALT, and the rest are optional differences
pos_ref_alt <- cols[1:3]
@@ -164,11 +162,11 @@
rest <- paste0(" :: ", paste0(cols[4:length(cols)], collapse = " "))
}
## Trim the REF or ALT if too long
- if (str_length(pos_ref_alt[2]) > 3) {
- pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…")
+ if (str_length(pos_ref_alt[2]) > min_bases + 3) {
+ pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, min_bases), "…+", str_length(pos_ref_alt[2]) - min_bases)
}
- if (str_length(pos_ref_alt[3]) > 3) {
- pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…")
+ if (str_length(pos_ref_alt[3]) > min_bases + 3) {
+ pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, min_bases), "…+", str_length(pos_ref_alt[3]) - min_bases)
}
## Join required
new_name <- paste0(pos_ref_alt[1], " ",
@@ -178,8 +176,32 @@
new_name <- paste0(new_name, " ", rest)
}
-colnames(final) <- sapply(colnames(final), fix_label)
-rownames(ann_final) <- sapply(rownames(ann_final), fix_label)
+fix_labels <- function(names) {
+ ## Try to reduce representations of variants by truncating REF and ALT
+ ## alleles.
+ ## Retries with less aggressive truncation if previous attempt did not
+ ## result in unique representations
+ ## For example, the variant representations:
+ ## 11074 C CTTTA
+ ## 11074 C CTTTAT
+ ## 11074 C CTTAGTT
+ ## will be turned into:
+ ## 11074 C > CTTTA
+ ## 11074 C > CTTTAT
+ ## 11074 C > CTT…+4
+
+ min_bases <- 3
+ repeat {
+ new_names <- sapply(names, fix_label, min_bases = min_bases)
+ if (length(unique(new_names)) == length(new_names)) {
+ break
+ }
+ min_bases <- min_bases + 1
+ }
+ return(new_names)
+}
+colnames(final) <- fix_labels(colnames(final))
+rownames(ann_final) <- fix_labels(rownames(ann_final))
## sanity test
stopifnot(all(colnames(final) %in% rownames(ann_final)))
diff -r dc51db22310c -r 3d0adeee3f2b snpfreqplot.xml
--- a/snpfreqplot.xml Fri Dec 18 23:48:01 2020 +0000
+++ b/snpfreqplot.xml Wed Jan 06 10:55:53 2021 +0000
@@ -3,7 +3,7 @@
Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data
1.0
- 2
+ 3
r-base
@@ -209,7 +209,7 @@
@@ -245,7 +245,7 @@