comparison helperFunctions.R @ 2:dc51db22310c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snpfreqplot/ commit d1c54d077cfc0eeb9699719760e668948cb9bbbc"
author iuc
date Fri, 18 Dec 2020 23:48:01 +0000
parents e362b3143cde
children
comparison
equal deleted inserted replaced
1:e362b3143cde 2:dc51db22310c
52 #' This function is necessary because tidyr is difficult 52 #' This function is necessary because tidyr is difficult
53 #' to write custom group binding functions. 53 #' to write custom group binding functions.
54 group_ind <- tab %>% group_by(POS, REF, ALT) %>% select(POS, REF, ALT) # nolint 54 group_ind <- tab %>% group_by(POS, REF, ALT) %>% select(POS, REF, ALT) # nolint
55 nlines <- nrow(tab) 55 nlines <- nrow(tab)
56 groups <- list() 56 groups <- list()
57 groups[[1]] <- c(1, 1) 57 if (nlines) {
58 last_pa <- paste(group_ind[1, ]) 58 groups[[1]] <- c(1, 1)
59 for (r in 2:nlines) { 59 } else {
60 curr_pa <- paste(group_ind[r, ]) 60 groups[[1]] <- c(0, 0)
61 group_ind_diff_between_lines <- !all(last_pa == curr_pa) 61 }
62 if (group_ind_diff_between_lines) { 62 if (nlines >= 2) {
63 ## end of current group, start of new 63 last_pa <- paste(group_ind[1, ])
64 groups[[length(groups)]][2] <- r - 1 ## change prev end 64 for (r in 2:nlines) {
65 groups[[length(groups) + 1]] <- c(r, r) ## set (start, end) 65 curr_pa <- paste(group_ind[r, ])
66 } else if (r == nlines) { 66 group_ind_diff_between_lines <- !all(last_pa == curr_pa)
67 ## i.e. if the very last line shares 67 if (group_ind_diff_between_lines) {
68 ## the same POS REF ALT as the one before, 68 ## end of current group, start of new
69 ## close current group. 69 groups[[length(groups)]][2] <- r - 1 ## change prev end
70 groups[[length(groups)]][2] <- r 70 groups[[length(groups) + 1]] <- c(r, r) ## set (start, end)
71 } else if (r == nlines) {
72 ## i.e. if the very last line shares
73 ## the same POS REF ALT as the one before,
74 ## close current group.
75 groups[[length(groups)]][2] <- r
76 }
77 last_pa <- curr_pa
71 } 78 }
72 last_pa <- curr_pa
73 } 79 }
74 as_tibble(do.call( 80 as_tibble(do.call(
75 "rbind", 81 "rbind",
76 lapply(groups, function(grange) { 82 lapply(groups, function(grange) {
77 expand_range <- grange[1]:grange[2] 83 expand_range <- grange[1]:grange[2]
80 )) 86 ))
81 } 87 }
82 88
83 read_and_process <- function(id) { 89 read_and_process <- function(id) {
84 file <- (samples %>% filter(ids == id))$files # nolint 90 file <- (samples %>% filter(ids == id))$files # nolint
85 variants <- read.table(file, header = T, sep = "\t") 91 variants <- read.table(file, header = T, sep = "\t", colClasses = "character")
92 variants["AF"] <- lapply(variants["AF"], as.numeric)
86 uniq_ids <- split_table_and_process(variants) 93 uniq_ids <- split_table_and_process(variants)
87 if (nrow(variants) != nrow(uniq_ids)) { 94 if (nrow(variants) != nrow(uniq_ids)) {
88 stop(paste0(id, " '", file, "' failed: ", file, "\"", 95 stop(paste0(id, " '", file, "' failed: ", file, "\"",
89 "nrow(variants)=", nrow(variants), 96 "nrow(variants)=", nrow(variants),
90 " but nrow(uniq_ids)=", nrow(uniq_ids))) 97 " but nrow(uniq_ids)=", nrow(uniq_ids)))