mykrobe_parser: mykrobe_parser.R comparison

comparison mykrobe_parser.R @ 2:f2608dccd3e0 draft

planemo upload for repository https://github.com/phac-nml/mykrobe-parser commit 1d77b6cae26ef3456ff6d469d71c61cab6a19906-dirty

author	nml
date	Tue, 23 Oct 2018 08:54:51 -0400
parents	6eae14751768
children	8529045f0fdf

comparison

equal deleted inserted replaced

-:05ca0dbc9f46
+:f2608dccd3e0
 # Take the JSON output from Mykrobe, rearrange, output for LIMS
 # Adrian Zetner
 # August 2018
 # Libraries ####
-library(jsonlite, quietly = T)
-library(here, quietly = T)
+sink(stdout(), type = "message")
-suppressMessages(library(dplyr, quietly = T))
-suppressMessages(library(purrr, quietly = T))
+suppressPackageStartupMessages({
-library(tidyr, quietly = T)
+library(jsonlite)
-library(stringr, quietly = T)
+library(here)
-library(optparse, quietly = T)
+library(dplyr)
+library(purrr)
+library(tidyr)
+library(stringr)
+library(optparse)
+})
 # Define custom functions, variables, and paths. Collect and use CL arguments ####
 # Here's a function to recreate that output table from the input JSON files
 # Take that list and mash all the elements together as columns in a tibble, recycling as needed to fill in space
 # eg. phylo_group is repeated/recycled as many times as there are drugs tested
 as_tibble(temp)
 }
-sink(stdout(), type = "message")
-suppressPackageStartupMessages({
-library(jsonlite)
-library(here)
-library(dplyr)
-library(purrr)
-library(tidyr)
-library(stringr)
-library(optparse)
-})
 # Get command line arguments with optparse
 option_list = list(
 make_option(c("-f", "--file"),
 type="character",
 if (length(predictions.table) == 1){
 print(predictions.table)
 stop("No susceptibility results in files specified. Did the testing fail?", call.=FALSE)
 }
-# Variants
+# Variants, if present
-# Multiple resistance mutations and confidence per drug in the X_R_mutations column
+if (0 < predictions.table %>%
-# Actual protein changes in Mykrobe_X columns
+select(ends_with("_Prediction")) %>%
+unlist(use.names = F) %>%
-variants.temp <-
+str_count("[R,r]") %>%
-temp %>%
+sum()){
-select(file, drug, variants = `variants (gene:alt_depth:wt_depth:conf)`) %>%
-mutate(variants = replace(variants, variants == "", NA)) %>% # Make missing data consistent...
+# Multiple resistance mutations and confidence per drug in the X_R_mutations column
-filter(!is.na(variants)) %>% # ...Then get rid of it
+# Actual protein changes in Mykrobe_X columns
-mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>%
-mutate(R_mutations = variants) %>%
+variants.temp <-
-mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows)
+temp %>%
-unnest(variants) %>%
+select(file, drug, variants = `variants (gene:alt_depth:wt_depth:conf)`) %>%
-separate(variants, c("gene", "mutation"), "_") %>%
+mutate(variants = replace(variants, variants == "", NA)) %>% # Make missing data consistent...
-mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "gid"), # Check for columns that include the drug name or not and paste accordingly
+filter(!is.na(variants)) %>% # ...Then get rid of it
-paste("Mykrobe", drug, gene, sep = "_"),
+mutate(tempcols = paste(drug, "R_mutations", sep = "_")) %>%
-paste("Mykrobe", gene, sep = "_"))) %>%
+mutate(R_mutations = variants) %>%
-# Extract out the mutation information with a regex that covers all potential genes
+mutate(variants = strsplit(variants, "__")) %>% # Split the mutations across rows (list first then split across rows)
-# This regex looks for whatever is ahead of the first colon and after the last hyphen
+unnest(variants) %>%
-mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>%
+separate(variants, c("gene", "mutation"), "_") %>%
-select(file, tempcols, R_mutations, columnname, mutation)
+mutate(columnname = ifelse(gene %in% c("tlyA", "rrs", "gid"), # Check for columns that include the drug name or not and paste accordingly
+paste("Mykrobe", drug, gene, sep = "_"),
-# Split each kind of variants into its own temp table then merge
+paste("Mykrobe", gene, sep = "_"))) %>%
-variants.1 <-
+# Extract out the mutation information with a regex that covers all potential genes
-variants.temp %>%
+# This regex looks for whatever is ahead of the first colon and after the last hyphen
-select(file, tempcols, R_mutations) %>%
+mutate(mutation = str_match(mutation, "(.*)-.*:")[,2]) %>%
-distinct() %>%
+select(file, tempcols, R_mutations, columnname, mutation)
-spread(tempcols, R_mutations)
+# Split each kind of variants into its own temp table then merge
-variants.2 <-
+variants.1 <-
 variants.temp %>%
-select(file, columnname, mutation) %>%
+select(file, tempcols, R_mutations) %>%
-group_by(file, columnname) %>%
+distinct() %>%
-summarise(mutation = paste(mutation, collapse = ";")) %>%
+spread(tempcols, R_mutations)
-spread(columnname, mutation)
+variants.2 <-
-variants.table <- full_join(variants.1, variants.2, by = "file")
+variants.temp %>%
+select(file, columnname, mutation) %>%
+group_by(file, columnname) %>%
+summarise(mutation = paste(mutation, collapse = ";")) %>%
+spread(columnname, mutation)
+variants.table <- full_join(variants.1, variants.2, by = "file")
+}else{
+variants.table <- data.frame(file=predictions.table$file, stringsAsFactors = F)
+}
 # Make a report ####
 report <-
 temp %>%
 phylo_group_depth,
 species_depth,
 lineage_depth) %>%
 distinct() %>%
 write.csv("output-jsondata.csv", row.names = F)
-print("Writing JSON data to CSV as output-jsondata.txt")
+print("Writing JSON data to CSV as output-jsondata.csv")
 sink(NULL, type="message") # close the sink
 quit()

Mercurial > repos > nml > mykrobe_parser

comparison mykrobe_parser.R @ 2:f2608dccd3e0 draft