Mercurial > repos > nml > mykrobe_parser
comparison mykrobe_parser.R @ 2:f2608dccd3e0 draft
planemo upload for repository https://github.com/phac-nml/mykrobe-parser commit 1d77b6cae26ef3456ff6d469d71c61cab6a19906-dirty
author | nml |
---|---|
date | Tue, 23 Oct 2018 08:54:51 -0400 |
parents | 6eae14751768 |
children | 8529045f0fdf |
comparison
equal
deleted
inserted
replaced
1:05ca0dbc9f46 | 2:f2608dccd3e0 |
---|---|
18 # Take the JSON output from Mykrobe, rearrange, output for LIMS | 18 # Take the JSON output from Mykrobe, rearrange, output for LIMS |
19 # Adrian Zetner | 19 # Adrian Zetner |
20 # August 2018 | 20 # August 2018 |
21 | 21 |
22 # Libraries #### | 22 # Libraries #### |
23 library(jsonlite, quietly = T) | 23 |
24 library(here, quietly = T) | 24 sink(stdout(), type = "message") |
25 suppressMessages(library(dplyr, quietly = T)) | 25 |
26 suppressMessages(library(purrr, quietly = T)) | 26 suppressPackageStartupMessages({ |
27 library(tidyr, quietly = T) | 27 library(jsonlite) |
28 library(stringr, quietly = T) | 28 library(here) |
29 library(optparse, quietly = T) | 29 library(dplyr) |
30 library(purrr) | |
31 library(tidyr) | |
32 library(stringr) | |
33 library(optparse) | |
34 }) | |
30 | 35 |
31 # Define custom functions, variables, and paths. Collect and use CL arguments #### | 36 # Define custom functions, variables, and paths. Collect and use CL arguments #### |
32 | 37 |
33 # Here's a function to recreate that output table from the input JSON files | 38 # Here's a function to recreate that output table from the input JSON files |
34 | 39 |
95 | 100 |
96 # Take that list and mash all the elements together as columns in a tibble, recycling as needed to fill in space | 101 # Take that list and mash all the elements together as columns in a tibble, recycling as needed to fill in space |
97 # eg. phylo_group is repeated/recycled as many times as there are drugs tested | 102 # eg. phylo_group is repeated/recycled as many times as there are drugs tested |
98 as_tibble(temp) | 103 as_tibble(temp) |
99 } | 104 } |
100 | |
101 sink(stdout(), type = "message") | |
102 | |
103 suppressPackageStartupMessages({ | |
104 library(jsonlite) | |
105 library(here) | |
106 library(dplyr) | |
107 library(purrr) | |
108 library(tidyr) | |
109 library(stringr) | |
110 library(optparse) | |
111 }) | |
112 | 105 |
113 # Get command line arguments with optparse | 106 # Get command line arguments with optparse |
114 option_list = list( | 107 option_list = list( |
115 make_option(c("-f", "--file"), | 108 make_option(c("-f", "--file"), |
116 type="character", | 109 type="character", |
272 if (length(predictions.table) == 1){ | 265 if (length(predictions.table) == 1){ |
273 print(predictions.table) | 266 print(predictions.table) |
274 stop("No susceptibility results in files specified. Did the testing fail?", call.=FALSE) | 267 stop("No susceptibility results in files specified. Did the testing fail?", call.=FALSE) |
275 } | 268 } |
276 | 269 |
277 # Variants | 270 # Variants, if present |
278 # Multiple resistance mutations and confidence per drug in the X_R_mutations column | 271 if (0 < predictions.table %>% |
279 # Actual protein changes in Mykrobe_X columns | 272 select(ends_with("_Prediction")) %>% |
280 | 273 unlist(use.names = F) %>% |
281 variants.temp <- | 274 str_count("[R,r]") %>% |
282 temp %>% | 275 sum()){ |
283 select(file, drug, variants = `variants (gene:alt_depth:wt_depth:conf)`) %>% | 276 |
284 mutate(variants = replace(variants, variants == "", NA)) %>% # Make missing data consistent... | 277 # Multiple resistance mutations and confidence per drug in the X_R_mutations column |
285 filter(!is.na(variants)) %>% # ...Then get rid of it | 278 # Actual protein changes in Mykrobe_X columns |
286 mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>% | 279 |
287 mutate(R_mutations = variants) %>% | 280 variants.temp <- |
288 mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows) | 281 temp %>% |
289 unnest(variants) %>% | 282 select(file, drug, variants = `variants (gene:alt_depth:wt_depth:conf)`) %>% |
290 separate(variants, c("gene", "mutation"), "_") %>% | 283 mutate(variants = replace(variants, variants == "", NA)) %>% # Make missing data consistent... |
291 mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "gid"), # Check for columns that include the drug name or not and paste accordingly | 284 filter(!is.na(variants)) %>% # ...Then get rid of it |
292 paste("Mykrobe", drug, gene, sep = "_"), | 285 mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>% |
293 paste("Mykrobe", gene, sep = "_"))) %>% | 286 mutate(R_mutations = variants) %>% |
294 # Extract out the mutation information with a regex that covers all potential genes | 287 mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows) |
295 # This regex looks for whatever is ahead of the first colon and after the last hyphen | 288 unnest(variants) %>% |
296 mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>% | 289 separate(variants, c("gene", "mutation"), "_") %>% |
297 select(file, tempcols, R_mutations, columnname, mutation) | 290 mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "gid"), # Check for columns that include the drug name or not and paste accordingly |
298 | 291 paste("Mykrobe", drug, gene, sep = "_"), |
299 # Split each kind of variants into its own temp table then merge | 292 paste("Mykrobe", gene, sep = "_"))) %>% |
300 variants.1 <- | 293 # Extract out the mutation information with a regex that covers all potential genes |
301 variants.temp %>% | 294 # This regex looks for whatever is ahead of the first colon and after the last hyphen |
302 select(file, tempcols, R_mutations) %>% | 295 mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>% |
303 distinct() %>% | 296 select(file, tempcols, R_mutations, columnname, mutation) |
304 spread(tempcols, R_mutations) | 297 |
305 | 298 # Split each kind of variants into its own temp table then merge |
306 variants.2 <- | 299 variants.1 <- |
307 variants.temp %>% | 300 variants.temp %>% |
308 select(file, columnname, mutation) %>% | 301 select(file, tempcols, R_mutations) %>% |
309 group_by(file, columnname) %>% | 302 distinct() %>% |
310 summarise(mutation = paste(mutation, collapse = ";")) %>% | 303 spread(tempcols, R_mutations) |
311 spread(columnname, mutation) | 304 |
312 | 305 variants.2 <- |
313 variants.table <- full_join(variants.1, variants.2, by = "file") | 306 variants.temp %>% |
307 select(file, columnname, mutation) %>% | |
308 group_by(file, columnname) %>% | |
309 summarise(mutation = paste(mutation, collapse = ";")) %>% | |
310 spread(columnname, mutation) | |
311 | |
312 variants.table <- full_join(variants.1, variants.2, by = "file") | |
313 }else{ | |
314 variants.table <- data.frame(file=predictions.table$file, stringsAsFactors = F) | |
315 } | |
316 | |
314 | 317 |
315 # Make a report #### | 318 # Make a report #### |
316 | 319 |
317 report <- | 320 report <- |
318 temp %>% | 321 temp %>% |
367 phylo_group_depth, | 370 phylo_group_depth, |
368 species_depth, | 371 species_depth, |
369 lineage_depth) %>% | 372 lineage_depth) %>% |
370 distinct() %>% | 373 distinct() %>% |
371 write.csv("output-jsondata.csv", row.names = F) | 374 write.csv("output-jsondata.csv", row.names = F) |
372 print("Writing JSON data to CSV as output-jsondata.txt") | 375 print("Writing JSON data to CSV as output-jsondata.csv") |
373 sink(NULL, type="message") # close the sink | 376 sink(NULL, type="message") # close the sink |
374 | 377 |
375 quit() | 378 quit() |