Mercurial > repos > davidvanzessen > mutation_analysis
diff pattern_plots.r @ 0:8a5a2abbb870 draft default tip
Uploaded
| author | davidvanzessen | 
|---|---|
| date | Mon, 29 Aug 2016 05:36:10 -0400 | 
| parents | |
| children | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pattern_plots.r Mon Aug 29 05:36:10 2016 -0400 @@ -0,0 +1,139 @@ +library(ggplot2) +library(reshape2) +library(scales) + +args <- commandArgs(trailingOnly = TRUE) + +input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt" + +plot1.path = args[2] +plot1.png = paste(plot1.path, ".png", sep="") +plot1.txt = paste(plot1.path, ".txt", sep="") + +plot2.path = args[3] +plot2.png = paste(plot2.path, ".png", sep="") +plot2.txt = paste(plot2.path, ".txt", sep="") + +plot3.path = args[4] +plot3.png = paste(plot3.path, ".png", sep="") +plot3.txt = paste(plot3.path, ".txt", sep="") + +dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1) + + + +classes = c("ca", "ca1", "ca2", "cg", "cg1", "cg2", "cg3", "cg4", "cm") +xyz = c("x", "y", "z") +new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep=".")) + +names(dat) = new.names + +dat["RGYW.WRCY",] = colSums(dat[c(13,14),]) +dat["TW.WA",] = colSums(dat[c(15,16),]) + +data1 = dat[c("RGYW.WRCY", "TW.WA"),] + +data1 = data1[,names(data1)[grepl(".z", names(data1))]] +names(data1) = gsub("\\..*", "", names(data1)) + +data1 = melt(t(data1)) + +names(data1) = c("Class", "Type", "value") + +write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) +png(filename=plot1.png) +print(p) +dev.off() + +data2 = dat[5:8,] + +data2["sum",] = colSums(data2) + +data2 = data2[,names(data2)[grepl("\\.x", names(data2))]] +names(data2) = gsub(".x", "", names(data2)) + +data2["A/T",] = round(colSums(data2[3:4,]) / data2["sum",] * 100, 1) +data2["A/T",is.nan(unlist(data2["A/T",]))] = 0 + +data2["G/C transversions",] = round(data2[2,] / data2["sum",] * 100, 1) +data2["G/C transitions",] = round(data2[1,] / data2["sum",] * 100, 1) + + +data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0 +data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0 +data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0 +data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0 + +data2 = melt(t(data2[6:8,])) + +names(data2) = c("Class", "Type", "value") + +write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") +png(filename=plot2.png) +print(p) +dev.off() + +data3 = dat[c(5, 6, 8, 17:20),] +data3 = data3[,names(data3)[grepl("\\.x", names(data3))]] +names(data3) = gsub(".x", "", names(data3)) + +data3["G/C transitions",] = round(data3[1,] / (data3[5,] + data3[7,]) * 100, 1) + +data3["G/C transversions",] = round(data3[2,] / (data3[5,] + data3[7,]) * 100, 1) + +data3["A/T",] = round(data3[3,] / (data3[4,] + data3[6,]) * 100, 1) + +data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0 +data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0 + +data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0 +data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0 + +data3["A/T",is.nan(unlist(data3["A/T",]))] = 0 +data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0 + +data3 = melt(t(data3[8:10,])) +names(data3) = c("Class", "Type", "value") + +write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) +png(filename=plot3.png) +print(p) +dev.off() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
