0
|
1 library(ggplot2)
|
|
2 library(reshape2)
|
|
3 library(scales)
|
|
4
|
|
5 args <- commandArgs(trailingOnly = TRUE)
|
|
6
|
|
7 input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
|
|
8
|
|
9 plot1.path = args[2]
|
|
10 plot1.png = paste(plot1.path, ".png", sep="")
|
|
11 plot1.txt = paste(plot1.path, ".txt", sep="")
|
|
12
|
|
13 plot2.path = args[3]
|
|
14 plot2.png = paste(plot2.path, ".png", sep="")
|
|
15 plot2.txt = paste(plot2.path, ".txt", sep="")
|
|
16
|
|
17 plot3.path = args[4]
|
|
18 plot3.png = paste(plot3.path, ".png", sep="")
|
|
19 plot3.txt = paste(plot3.path, ".txt", sep="")
|
|
20
|
|
21 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
|
|
22
|
|
23
|
|
24
|
|
25 classes = c("ca", "ca1", "ca2", "cg", "cg1", "cg2", "cg3", "cg4", "cm")
|
|
26 xyz = c("x", "y", "z")
|
|
27 new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
|
|
28
|
|
29 names(dat) = new.names
|
|
30
|
|
31 dat["RGYW.WRCY",] = colSums(dat[c(13,14),])
|
|
32 dat["TW.WA",] = colSums(dat[c(15,16),])
|
|
33
|
|
34 data1 = dat[c("RGYW.WRCY", "TW.WA"),]
|
|
35
|
|
36 data1 = data1[,names(data1)[grepl(".z", names(data1))]]
|
|
37 names(data1) = gsub("\\..*", "", names(data1))
|
|
38
|
|
39 data1 = melt(t(data1))
|
|
40
|
|
41 names(data1) = c("Class", "Type", "value")
|
|
42
|
|
43 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
|
|
44
|
|
45 p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL))
|
|
46 png(filename=plot1.png)
|
|
47 print(p)
|
|
48 dev.off()
|
|
49
|
|
50 data2 = dat[5:8,]
|
|
51
|
|
52 data2["sum",] = colSums(data2)
|
|
53
|
|
54 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
|
|
55 names(data2) = gsub(".x", "", names(data2))
|
|
56
|
|
57 data2["A/T",] = round(colSums(data2[3:4,]) / data2["sum",] * 100, 1)
|
|
58 data2["A/T",is.nan(unlist(data2["A/T",]))] = 0
|
|
59
|
|
60 data2["G/C transversions",] = round(data2[2,] / data2["sum",] * 100, 1)
|
|
61 data2["G/C transitions",] = round(data2[1,] / data2["sum",] * 100, 1)
|
|
62
|
|
63
|
|
64 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
|
|
65 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
|
|
66 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
|
|
67 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
|
|
68
|
|
69 data2 = melt(t(data2[6:8,]))
|
|
70
|
|
71 names(data2) = c("Class", "Type", "value")
|
|
72
|
|
73 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
|
|
74
|
|
75 p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations")
|
|
76 png(filename=plot2.png)
|
|
77 print(p)
|
|
78 dev.off()
|
|
79
|
|
80 data3 = dat[c(5, 6, 8, 17:20),]
|
|
81 data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
|
|
82 names(data3) = gsub(".x", "", names(data3))
|
|
83
|
|
84 data3["G/C transitions",] = round(data3[1,] / (data3[5,] + data3[7,]) * 100, 1)
|
|
85
|
|
86 data3["G/C transversions",] = round(data3[2,] / (data3[5,] + data3[7,]) * 100, 1)
|
|
87
|
|
88 data3["A/T",] = round(data3[3,] / (data3[4,] + data3[6,]) * 100, 1)
|
|
89
|
|
90 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
|
|
91 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
|
|
92
|
|
93 data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
|
|
94 data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
|
|
95
|
|
96 data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
|
|
97 data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
|
|
98
|
|
99 data3 = melt(t(data3[8:10,]))
|
|
100 names(data3) = c("Class", "Type", "value")
|
|
101
|
|
102 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
|
|
103
|
|
104 p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL))
|
|
105 png(filename=plot3.png)
|
|
106 print(p)
|
|
107 dev.off()
|
|
108
|
|
109
|
|
110
|
|
111
|
|
112
|
|
113
|
|
114
|
|
115
|
|
116
|
|
117
|
|
118
|
|
119
|
|
120
|
|
121
|
|
122
|
|
123
|
|
124
|
|
125
|
|
126
|
|
127
|
|
128
|
|
129
|
|
130
|
|
131
|
|
132
|
|
133
|
|
134
|
|
135
|
|
136
|
|
137
|
|
138
|
|
139
|