comparison heatmap_for_variants.R @ 3:3d0adeee3f2b draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit c062eb1cd00ce9d565f3e2f3b042b3dd90d78ce4"
author iuc
date Wed, 06 Jan 2021 10:55:53 +0000
parents dc51db22310c
children
comparison
equal deleted inserted replaced
2:dc51db22310c 3:3d0adeee3f2b
151 } 151 }
152 152
153 153
154 # Fix Labels 154 # Fix Labels
155 ## Prettify names, check for label parity between final and ann_final 155 ## Prettify names, check for label parity between final and ann_final
156 fix_label <- function(name) { 156 fix_label <- function(name, min_bases) {
157 ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A
158 ##' to: 424 AGT… > A
159 cols <- unlist(str_split(name, " ")) 157 cols <- unlist(str_split(name, " "))
160 ## first 3 are POS REF ALT, and the rest are optional differences 158 ## first 3 are POS REF ALT, and the rest are optional differences
161 pos_ref_alt <- cols[1:3] 159 pos_ref_alt <- cols[1:3]
162 rest <- "" 160 rest <- ""
163 if (length(cols) > 3) { 161 if (length(cols) > 3) {
164 rest <- paste0(" :: ", paste0(cols[4:length(cols)], collapse = " ")) 162 rest <- paste0(" :: ", paste0(cols[4:length(cols)], collapse = " "))
165 } 163 }
166 ## Trim the REF or ALT if too long 164 ## Trim the REF or ALT if too long
167 if (str_length(pos_ref_alt[2]) > 3) { 165 if (str_length(pos_ref_alt[2]) > min_bases + 3) {
168 pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…") 166 pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, min_bases), "…+", str_length(pos_ref_alt[2]) - min_bases)
169 } 167 }
170 if (str_length(pos_ref_alt[3]) > 3) { 168 if (str_length(pos_ref_alt[3]) > min_bases + 3) {
171 pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…") 169 pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, min_bases), "…+", str_length(pos_ref_alt[3]) - min_bases)
172 } 170 }
173 ## Join required 171 ## Join required
174 new_name <- paste0(pos_ref_alt[1], " ", 172 new_name <- paste0(pos_ref_alt[1], " ",
175 pos_ref_alt[2], " > ", 173 pos_ref_alt[2], " > ",
176 pos_ref_alt[3]) 174 pos_ref_alt[3])
177 ## Join rest 175 ## Join rest
178 new_name <- paste0(new_name, " ", rest) 176 new_name <- paste0(new_name, " ", rest)
179 } 177 }
180 178
181 colnames(final) <- sapply(colnames(final), fix_label) 179 fix_labels <- function(names) {
182 rownames(ann_final) <- sapply(rownames(ann_final), fix_label) 180 ## Try to reduce representations of variants by truncating REF and ALT
181 ## alleles.
182 ## Retries with less aggressive truncation if previous attempt did not
183 ## result in unique representations
184 ## For example, the variant representations:
185 ## 11074 C CTTTA
186 ## 11074 C CTTTAT
187 ## 11074 C CTTAGTT
188 ## will be turned into:
189 ## 11074 C > CTTTA
190 ## 11074 C > CTTTAT
191 ## 11074 C > CTT…+4
192
193 min_bases <- 3
194 repeat {
195 new_names <- sapply(names, fix_label, min_bases = min_bases)
196 if (length(unique(new_names)) == length(new_names)) {
197 break
198 }
199 min_bases <- min_bases + 1
200 }
201 return(new_names)
202 }
203 colnames(final) <- fix_labels(colnames(final))
204 rownames(ann_final) <- fix_labels(rownames(ann_final))
183 ## sanity test 205 ## sanity test
184 stopifnot(all(colnames(final) %in% rownames(ann_final))) 206 stopifnot(all(colnames(final) %in% rownames(ann_final)))
185 207
186 208
187 # Perform Plotting 209 # Perform Plotting