diff pattern_plots.r @ 23:81453585dfc3 draft

Uploaded
author davidvanzessen
date Thu, 01 Dec 2016 09:32:06 -0500
parents 012a738edf5a
children 05c62efdc393
line wrap: on
line diff
--- a/pattern_plots.r	Mon Nov 28 10:27:22 2016 -0500
+++ b/pattern_plots.r	Thu Dec 01 09:32:06 2016 -0500
@@ -18,6 +18,8 @@
 plot3.png = paste(plot3.path, ".png", sep="")
 plot3.txt = paste(plot3.path, ".txt", sep="")
 
+clean.output = args[5]
+
 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
 
 
@@ -28,6 +30,11 @@
 
 names(dat) = new.names
 
+clean.dat = dat
+clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))]
+
+write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA)
+
 dat["RGYW.WRCY",] = colSums(dat[c(13,14),], na.rm=T)
 dat["TW.WA",] = colSums(dat[c(15,16),], na.rm=T)
 
@@ -51,26 +58,24 @@
 print(p)
 dev.off()
 
-data2 = dat[5:8,]
-
-data2["sum",] = colSums(data2, na.rm=T)
+data2 = dat[c(1, 5:8),]
 
 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
 names(data2) = gsub(".x", "", names(data2))
 
-data2["A/T",] = round(colSums(data2[3:4,]) / data2["sum",] * 100, 1)
-data2["A/T",is.nan(unlist(data2["A/T",]))] = 0
+data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]]
 
-data2["G/C transversions",] = round(data2[2,] / data2["sum",] * 100, 1)
-data2["G/C transitions",] = round(data2[1,] / data2["sum",] * 100, 1)
+data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1)
 
+data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]]
+data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1)
 
 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
 
-data2 = melt(t(data2[6:8,]))
+data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),]))
 
 names(data2) = c("Class", "Type", "value")
 
@@ -92,11 +97,11 @@
 data3[is.na(data3)] = 0
 #data3[is.infinite(data3)] = 0
 
-data3["G/C transitions",] = round(data3[1,] / (data3[5,] + data3[7,]) * 100, 1)
+data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1)
 
-data3["G/C transversions",] = round(data3[2,] / (data3[5,] + data3[7,]) * 100, 1)
+data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1)
 
-data3["A/T",] = round(data3[3,] / (data3[4,] + data3[6,]) * 100, 1)
+data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1)
 
 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0