Mercurial > repos > iuc > snpfreqplot
comparison heatmap_for_variants.R @ 3:3d0adeee3f2b draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit c062eb1cd00ce9d565f3e2f3b042b3dd90d78ce4"
author | iuc |
---|---|
date | Wed, 06 Jan 2021 10:55:53 +0000 |
parents | dc51db22310c |
children |
comparison
equal
deleted
inserted
replaced
2:dc51db22310c | 3:3d0adeee3f2b |
---|---|
151 } | 151 } |
152 | 152 |
153 | 153 |
154 # Fix Labels | 154 # Fix Labels |
155 ## Prettify names, check for label parity between final and ann_final | 155 ## Prettify names, check for label parity between final and ann_final |
156 fix_label <- function(name) { | 156 fix_label <- function(name, min_bases) { |
157 ##' Reduce: 424 AGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTT A | |
158 ##' to: 424 AGT… > A | |
159 cols <- unlist(str_split(name, " ")) | 157 cols <- unlist(str_split(name, " ")) |
160 ## first 3 are POS REF ALT, and the rest are optional differences | 158 ## first 3 are POS REF ALT, and the rest are optional differences |
161 pos_ref_alt <- cols[1:3] | 159 pos_ref_alt <- cols[1:3] |
162 rest <- "" | 160 rest <- "" |
163 if (length(cols) > 3) { | 161 if (length(cols) > 3) { |
164 rest <- paste0(" :: ", paste0(cols[4:length(cols)], collapse = " ")) | 162 rest <- paste0(" :: ", paste0(cols[4:length(cols)], collapse = " ")) |
165 } | 163 } |
166 ## Trim the REF or ALT if too long | 164 ## Trim the REF or ALT if too long |
167 if (str_length(pos_ref_alt[2]) > 3) { | 165 if (str_length(pos_ref_alt[2]) > min_bases + 3) { |
168 pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, 3), "…") | 166 pos_ref_alt[2] <- paste0(substring(pos_ref_alt[2], 1, min_bases), "…+", str_length(pos_ref_alt[2]) - min_bases) |
169 } | 167 } |
170 if (str_length(pos_ref_alt[3]) > 3) { | 168 if (str_length(pos_ref_alt[3]) > min_bases + 3) { |
171 pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, 3), "…") | 169 pos_ref_alt[3] <- paste0(substring(pos_ref_alt[3], 1, min_bases), "…+", str_length(pos_ref_alt[3]) - min_bases) |
172 } | 170 } |
173 ## Join required | 171 ## Join required |
174 new_name <- paste0(pos_ref_alt[1], " ", | 172 new_name <- paste0(pos_ref_alt[1], " ", |
175 pos_ref_alt[2], " > ", | 173 pos_ref_alt[2], " > ", |
176 pos_ref_alt[3]) | 174 pos_ref_alt[3]) |
177 ## Join rest | 175 ## Join rest |
178 new_name <- paste0(new_name, " ", rest) | 176 new_name <- paste0(new_name, " ", rest) |
179 } | 177 } |
180 | 178 |
181 colnames(final) <- sapply(colnames(final), fix_label) | 179 fix_labels <- function(names) { |
182 rownames(ann_final) <- sapply(rownames(ann_final), fix_label) | 180 ## Try to reduce representations of variants by truncating REF and ALT |
181 ## alleles. | |
182 ## Retries with less aggressive truncation if previous attempt did not | |
183 ## result in unique representations | |
184 ## For example, the variant representations: | |
185 ## 11074 C CTTTA | |
186 ## 11074 C CTTTAT | |
187 ## 11074 C CTTAGTT | |
188 ## will be turned into: | |
189 ## 11074 C > CTTTA | |
190 ## 11074 C > CTTTAT | |
191 ## 11074 C > CTT…+4 | |
192 | |
193 min_bases <- 3 | |
194 repeat { | |
195 new_names <- sapply(names, fix_label, min_bases = min_bases) | |
196 if (length(unique(new_names)) == length(new_names)) { | |
197 break | |
198 } | |
199 min_bases <- min_bases + 1 | |
200 } | |
201 return(new_names) | |
202 } | |
203 colnames(final) <- fix_labels(colnames(final)) | |
204 rownames(ann_final) <- fix_labels(rownames(ann_final)) | |
183 ## sanity test | 205 ## sanity test |
184 stopifnot(all(colnames(final) %in% rownames(ann_final))) | 206 stopifnot(all(colnames(final) %in% rownames(ann_final))) |
185 | 207 |
186 | 208 |
187 # Perform Plotting | 209 # Perform Plotting |