Mercurial > repos > dktanwar > test_csaw_1
view csaw.R @ 5:aa29b20bbb45 draft
Uploaded
author | dktanwar |
---|---|
date | Mon, 18 Dec 2017 12:05:05 -0500 |
parents | ce3ad612a104 |
children | ee07a679ac08 |
line wrap: on
line source
## How to run tool # $ Rscript my_r_tool.R # --input1 input1.csv # --input2 input2.csv # --output1 output.csv # --output2 output2.csv # Setup R error handling to go to stderr options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) # We need to not crash galaxy with an UTF8 error on German LC settings. loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") library("csaw") library("stringr") library("data.table") library("getopt") library("Rsamtools") options(stringAsfactors = FALSE, useFancyQuotes = FALSE) # Take in trailing command line arguments output <- commandArgs(trailingOnly=TRUE)[2] inputs <- commandArgs(trailingOnly=TRUE)[1] print(output) print(inputs) # Separate multiple input files into a list of individual files files <- unlist(strsplit(inputs, ',')) # Index bamfiles indexBam(files = files) # Create windows and count reads in them ---- Sys.time() windows <- windowCounts(files, spacing=150, width=200, bin=F) Sys.time() df <- data.frame(rowRanges(windows), stringsAsFactors = F) df <- df[,c(1:3)] file_names <- basename(data.frame(colData(windows))$bam.files) # Final table with all windows and read counts ---- table <- data.frame(df, assay(windows), stringsAsFactors = F, check.names = F) colnames(table)[4:ncol(table)] <- file_names # Remove spaces in the table ---- setDT(table) for (j in names(table)) set(table, j = j, value = table[[trimws(j)]]) # Save final table ---- # fwrite(x = table_sp, file = output, quote = F, row.names = F, sep = "\t") dt <- table[,regions:=paste0(seqnames,"-", start, "-", end)] table_sp <- data.frame(dt) for(i in 4:(ncol(table_sp)-1)){ tmp <- table_sp[,c(ncol(table_sp), i)] fwrite(x = tmp, file = output, quote = F, row.names = F, sep = "\t") } # # Save individual files ---- # Sys.time() # r <- paste(table_sp[,1], table_sp[,2], table_sp[,3], sep = "-") # Sys.time() # # r <- apply( table_sp[ ,c(1:3)] , 1 , paste , sep = "-" ) # # dir <- paste(opt$outdir, "counts_each_sample", sep = "/") # dir.create(dir) # # # cores <- detectCores() # # cl <- makeCluster(cores) # # registerDoParallel(cl) # # tab <- data.frame(regions = r, table_sp[,4:ncol(table_sp)], stringsAsFactors = F, check.names = F) # # # foreach(i = 2:ncol(tab)) %dopar% { # for(i in 2:ncol(tab)){ # print(i) # tmp <- data.frame(tab[,c(1,i)], stringsAsFactors = F, check.names = F) # n <- paste(dir, "/", colnames(tab)[i], ".txt", sep = "") # # write.table(tmp, xzfile(paste(dir, "/", n, ".txt.xz", sep = "")), sep = "\t", quote = F, row.names = F) # fwrite(x = tmp, file = n, quote = F, row.names = F, sep = "\t") # system(paste0("xz -3 -T 12 ", n)) # } # # stopCluster(cl) sessionInfo()