0
|
1 ## How to run tool
|
|
2 # $ Rscript my_r_tool.R
|
|
3 # --input1 input1.csv
|
|
4 # --input2 input2.csv
|
|
5 # --output1 output.csv
|
|
6 # --output2 output2.csv
|
|
7
|
|
8 # Setup R error handling to go to stderr
|
|
9 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
|
|
10 # We need to not crash galaxy with an UTF8 error on German LC settings.
|
|
11 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
|
|
12
|
|
13
|
|
14 library("csaw")
|
|
15 library("stringr")
|
|
16 library("data.table")
|
|
17 library("getopt")
|
3
|
18 library("Rsamtools")
|
0
|
19
|
|
20
|
|
21 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
|
|
22 # Take in trailing command line arguments
|
|
23
|
|
24 output <- commandArgs(trailingOnly=TRUE)[2]
|
|
25 inputs <- commandArgs(trailingOnly=TRUE)[1]
|
|
26
|
|
27 print(output)
|
|
28 print(inputs)
|
|
29
|
|
30 # Separate multiple input files into a list of individual files
|
|
31 files <- unlist(strsplit(inputs, ','))
|
|
32
|
3
|
33 # Index bamfiles
|
|
34 indexBam(files = files)
|
|
35
|
0
|
36 # Create windows and count reads in them ----
|
|
37 Sys.time()
|
|
38 windows <- windowCounts(files, spacing=150, width=200, bin=F)
|
|
39 Sys.time()
|
|
40
|
|
41 df <- data.frame(rowRanges(windows), stringsAsFactors = F)
|
|
42 df <- df[,c(1:3)]
|
|
43
|
|
44 file_names <- basename(data.frame(colData(windows))$bam.files)
|
|
45
|
|
46
|
|
47 # Final table with all windows and read counts ----
|
|
48 table <- data.frame(df, assay(windows), stringsAsFactors = F, check.names = F)
|
|
49 colnames(table)[4:ncol(table)] <- file_names
|
|
50
|
|
51
|
|
52 # Remove spaces in the table ----
|
|
53 setDT(table)
|
|
54 for (j in names(table)) set(table, j = j, value = table[[trimws(j)]])
|
|
55 table_sp <- data.frame(table)
|
|
56
|
|
57 # Save final table ----
|
|
58 fwrite(x = table_sp, file = output, quote = F, row.names = F, sep = "\t")
|
|
59
|
|
60 # # Save individual files ----
|
|
61 # Sys.time()
|
|
62 # r <- paste(table_sp[,1], table_sp[,2], table_sp[,3], sep = "-")
|
|
63 # Sys.time()
|
|
64 # # r <- apply( table_sp[ ,c(1:3)] , 1 , paste , sep = "-" )
|
|
65 #
|
|
66 # dir <- paste(opt$outdir, "counts_each_sample", sep = "/")
|
|
67 # dir.create(dir)
|
|
68 #
|
|
69 # # cores <- detectCores()
|
|
70 # # cl <- makeCluster(cores)
|
|
71 # # registerDoParallel(cl)
|
|
72 #
|
|
73 # tab <- data.frame(regions = r, table_sp[,4:ncol(table_sp)], stringsAsFactors = F, check.names = F)
|
|
74 #
|
|
75 # # foreach(i = 2:ncol(tab)) %dopar% {
|
|
76 # for(i in 2:ncol(tab)){
|
|
77 # print(i)
|
|
78 # tmp <- data.frame(tab[,c(1,i)], stringsAsFactors = F, check.names = F)
|
|
79 # n <- paste(dir, "/", colnames(tab)[i], ".txt", sep = "")
|
|
80 # # write.table(tmp, xzfile(paste(dir, "/", n, ".txt.xz", sep = "")), sep = "\t", quote = F, row.names = F)
|
|
81 # fwrite(x = tmp, file = n, quote = F, row.names = F, sep = "\t")
|
|
82 # system(paste0("xz -3 -T 12 ", n))
|
|
83 # }
|
|
84 # # stopCluster(cl)
|
|
85
|
|
86 sessionInfo() |