0
+ − 1 library(ggplot2)
+ − 2 library(reshape2)
+ − 3 library(scales)
+ − 4
+ − 5 args <- commandArgs(trailingOnly = TRUE)
+ − 6
+ − 7 input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
+ − 8
+ − 9 plot1.path = args[2]
+ − 10 plot1.png = paste(plot1.path, ".png", sep="")
+ − 11 plot1.txt = paste(plot1.path, ".txt", sep="")
42
+ − 12 plot1.pdf = paste(plot1.path, ".pdf", sep="")
0
+ − 13
+ − 14 plot2.path = args[3]
+ − 15 plot2.png = paste(plot2.path, ".png", sep="")
+ − 16 plot2.txt = paste(plot2.path, ".txt", sep="")
42
+ − 17 plot2.pdf = paste(plot2.path, ".pdf", sep="")
0
+ − 18
+ − 19 plot3.path = args[4]
+ − 20 plot3.png = paste(plot3.path, ".png", sep="")
+ − 21 plot3.txt = paste(plot3.path, ".txt", sep="")
42
+ − 22 plot3.pdf = paste(plot3.path, ".pdf", sep="")
0
+ − 23
23
+ − 24 clean.output = args[5]
+ − 25
0
+ − 26 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
+ − 27
5
+ − 28 classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE")
0
+ − 29 xyz = c("x", "y", "z")
+ − 30 new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
+ − 31
+ − 32 names(dat) = new.names
+ − 33
23
+ − 34 clean.dat = dat
+ − 35 clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))]
+ − 36
+ − 37 write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA)
+ − 38
0
+ − 39 dat["RGYW.WRCY",] = colSums(dat[c(13,14),], na.rm=T)
+ − 40 dat["TW.WA",] = colSums(dat[c(15,16),], na.rm=T)
+ − 41
+ − 42 data1 = dat[c("RGYW.WRCY", "TW.WA"),]
+ − 43
+ − 44 data1 = data1[,names(data1)[grepl(".z", names(data1))]]
+ − 45 names(data1) = gsub("\\..*", "", names(data1))
+ − 46
+ − 47 data1 = melt(t(data1))
+ − 48
+ − 49 names(data1) = c("Class", "Type", "value")
+ − 50
38
+ − 51 chk = is.na(data1$value)
+ − 52 if(any(chk)){
+ − 53 data1[chk, "value"] = 0
+ − 54 }
+ − 55
0
+ − 56 data1 = data1[order(data1$Type),]
+ − 57
+ − 58 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
+ − 59
39
+ − 60 p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) + ggtitle("Percentage of mutations in AID and pol eta motives")
5
+ − 61 p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
0
+ − 62 #p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4"))
39
+ − 63 png(filename=plot1.png, width=510, height=300)
0
+ − 64 print(p)
+ − 65 dev.off()
+ − 66
42
+ − 67 ggsave(plot1.pdf, p)
+ − 68
23
+ − 69 data2 = dat[c(1, 5:8),]
0
+ − 70
+ − 71 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
+ − 72 names(data2) = gsub(".x", "", names(data2))
+ − 73
23
+ − 74 data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]]
0
+ − 75
23
+ − 76 data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1)
0
+ − 77
23
+ − 78 data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]]
+ − 79 data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1)
0
+ − 80
+ − 81 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
+ − 82 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
+ − 83 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
+ − 84 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
+ − 85
23
+ − 86 data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),]))
0
+ − 87
+ − 88 names(data2) = c("Class", "Type", "value")
+ − 89
38
+ − 90 chk = is.na(data2$value)
+ − 91 if(any(chk)){
+ − 92 data2[chk, "value"] = 0
+ − 93 }
+ − 94
0
+ − 95 data2 = data2[order(data2$Type),]
+ − 96
+ − 97 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
+ − 98
39
+ − 99 p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") + ggtitle("Relative mutation patterns")
5
+ − 100 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
0
+ − 101 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
+ − 102 png(filename=plot2.png, width=480, height=300)
+ − 103 print(p)
+ − 104 dev.off()
+ − 105
42
+ − 106 ggsave(plot2.pdf, p)
+ − 107
0
+ − 108 data3 = dat[c(5, 6, 8, 17:20),]
+ − 109 data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
+ − 110 names(data3) = gsub(".x", "", names(data3))
+ − 111
23
+ − 112 data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1)
0
+ − 113
23
+ − 114 data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1)
0
+ − 115
23
+ − 116 data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1)
0
+ − 117
+ − 118 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
+ − 119 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
+ − 120
+ − 121 data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
+ − 122 data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
+ − 123
+ − 124 data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
+ − 125 data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
+ − 126
+ − 127 data3 = melt(t(data3[8:10,]))
+ − 128 names(data3) = c("Class", "Type", "value")
+ − 129
39
+ − 130 chk = is.na(data3$value)
38
+ − 131 if(any(chk)){
+ − 132 data3[chk, "value"] = 0
+ − 133 }
+ − 134
0
+ − 135 data3 = data3[order(data3$Type),]
+ − 136
+ − 137 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
+ − 138
39
+ − 139 p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) + ggtitle("Absolute mutation patterns")
5
+ − 140 p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
0
+ − 141 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
+ − 142 png(filename=plot3.png, width=480, height=300)
+ − 143 print(p)
+ − 144 dev.off()
+ − 145
42
+ − 146 ggsave(plot3.pdf, p)
0
+ − 147
+ − 148
+ − 149
+ − 150
+ − 151
+ − 152
+ − 153
+ − 154
+ − 155
+ − 156
+ − 157
+ − 158
+ − 159
+ − 160
+ − 161
+ − 162
+ − 163
+ − 164
+ − 165
+ − 166
+ − 167
+ − 168
+ − 169
+ − 170
+ − 171
+ − 172
+ − 173
+ − 174
+ − 175
+ − 176
+ − 177
42
+ − 178