# HG changeset patch # User iuc # Date 1609930553 0 # Node ID 3d0adeee3f2b4f8ed9d9d7f67baa84f2a4e5b201 # Parent dc51db22310c7bf74761bd1bb9c49df6fb65e36d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit c062eb1cd00ce9d565f3e2f3b042b3dd90d78ce4" diff -r dc51db22310c -r 3d0adeee3f2b heatmap_for_variants.R --- a/heatmap_for_variants.R Fri Dec 18 23:48:01 2020 +0000 +++ b/heatmap_for_variants.R Wed Jan 06 10:55:53 2021 +0000 @@ -153,9 +153,7 @@ # Fix Labels ## Prettify names, check for label parity between final and ann_final -fix_label <- function(name) { - ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A - ##' to: 424 AGT… > A +fix_label <- function(name, min_bases) { cols <- unlist(str_split(name, " ")) ## first 3 are POS REF ALT, and the rest are optional differences pos_ref_alt <- cols[1:3] @@ -164,11 +162,11 @@ rest <- paste0(" :: ", paste0(cols[4:length(cols)], collapse = " ")) } ## Trim the REF or ALT if too long - if (str_length(pos_ref_alt[2]) > 3) { - pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…") + if (str_length(pos_ref_alt[2]) > min_bases + 3) { + pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, min_bases), "…+", str_length(pos_ref_alt[2]) - min_bases) } - if (str_length(pos_ref_alt[3]) > 3) { - pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…") + if (str_length(pos_ref_alt[3]) > min_bases + 3) { + pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, min_bases), "…+", str_length(pos_ref_alt[3]) - min_bases) } ## Join required new_name <- paste0(pos_ref_alt[1], " ", @@ -178,8 +176,32 @@ new_name <- paste0(new_name, " ", rest) } -colnames(final) <- sapply(colnames(final), fix_label) -rownames(ann_final) <- sapply(rownames(ann_final), fix_label) +fix_labels <- function(names) { + ## Try to reduce representations of variants by truncating REF and ALT + ## alleles. + ## Retries with less aggressive truncation if previous attempt did not + ## result in unique representations + ## For example, the variant representations: + ## 11074 C CTTTA + ## 11074 C CTTTAT + ## 11074 C CTTAGTT + ## will be turned into: + ## 11074 C > CTTTA + ## 11074 C > CTTTAT + ## 11074 C > CTT…+4 + + min_bases <- 3 + repeat { + new_names <- sapply(names, fix_label, min_bases = min_bases) + if (length(unique(new_names)) == length(new_names)) { + break + } + min_bases <- min_bases + 1 + } + return(new_names) +} +colnames(final) <- fix_labels(colnames(final)) +rownames(ann_final) <- fix_labels(rownames(ann_final)) ## sanity test stopifnot(all(colnames(final) %in% rownames(ann_final))) diff -r dc51db22310c -r 3d0adeee3f2b snpfreqplot.xml --- a/snpfreqplot.xml Fri Dec 18 23:48:01 2020 +0000 +++ b/snpfreqplot.xml Wed Jan 06 10:55:53 2021 +0000 @@ -3,7 +3,7 @@ Generates a heatmap of allele frequencies grouped by variant type for SnpEff-annotated SARS-CoV-2 data 1.0 - 2 + 3 r-base @@ -209,7 +209,7 @@ - + @@ -245,7 +245,7 @@ - +