Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 32:7c33029fd63d draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 28 Mar 2017 05:40:04 -0400 |
| parents | b50965edac24 |
| children | f37e072affc0 |
comparison
equal
deleted
inserted
replaced
| 31:3a76faa53c59 | 32:7c33029fd63d |
|---|---|
| 300 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 300 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 301 write.table(x=PRODFV, file="VFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 301 write.table(x=PRODFV, file="VFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 302 | 302 |
| 303 png("VPlot.png",width = 1280, height = 720) | 303 png("VPlot.png",width = 1280, height = 720) |
| 304 pV | 304 pV |
| 305 dev.off(); | 305 dev.off() |
| 306 | |
| 307 ggsave("VPlot.pdf", pV, width=13, height=7) | |
| 306 | 308 |
| 307 if(useD){ | 309 if(useD){ |
| 308 pD = ggplot(PRODFD) | 310 pD = ggplot(PRODFD) |
| 309 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 311 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
| 310 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) | 312 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) |
| 311 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 313 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 312 write.table(x=PRODFD, file="DFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 314 write.table(x=PRODFD, file="DFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 313 | 315 |
| 314 png("DPlot.png",width = 800, height = 600) | 316 png("DPlot.png",width = 800, height = 600) |
| 315 print(pD) | 317 print(pD) |
| 316 dev.off(); | 318 dev.off() |
| 319 | |
| 320 ggsave("DPlot.pdf", pD, width=10, height=7) | |
| 317 } | 321 } |
| 318 | 322 |
| 319 pJ = ggplot(PRODFJ) | 323 pJ = ggplot(PRODFJ) |
| 320 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 324 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
| 321 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) | 325 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) |
| 322 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 326 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 323 write.table(x=PRODFJ, file="JFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 327 write.table(x=PRODFJ, file="JFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 324 | 328 |
| 325 png("JPlot.png",width = 800, height = 600) | 329 png("JPlot.png",width = 800, height = 600) |
| 326 pJ | 330 pJ |
| 327 dev.off(); | 331 dev.off() |
| 332 | |
| 333 ggsave("JPlot.pdf", pJ) | |
| 328 | 334 |
| 329 # ---------------------- Now the frequency plots of the V, D and J families ---------------------- | 335 # ---------------------- Now the frequency plots of the V, D and J families ---------------------- |
| 330 | 336 |
| 331 print("Report Clonality - V, D and J family plots") | 337 print("Report Clonality - V, D and J family plots") |
| 332 | 338 |
| 342 ylab("Percentage of sequences") + | 348 ylab("Percentage of sequences") + |
| 343 scale_fill_manual(values=sample.colors) + | 349 scale_fill_manual(values=sample.colors) + |
| 344 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 350 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 345 png("VFPlot.png") | 351 png("VFPlot.png") |
| 346 VPlot | 352 VPlot |
| 347 dev.off(); | 353 dev.off() |
| 354 ggsave("VFPlot.pdf", VPlot) | |
| 355 | |
| 348 write.table(x=VGenes, file="VFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 356 write.table(x=VGenes, file="VFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 349 | 357 |
| 350 if(useD){ | 358 if(useD){ |
| 351 DGenes = PRODF[,c("Sample", "Top.D.Gene")] | 359 DGenes = PRODF[,c("Sample", "Top.D.Gene")] |
| 352 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) | 360 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) |
| 360 ylab("Percentage of sequences") + | 368 ylab("Percentage of sequences") + |
| 361 scale_fill_manual(values=sample.colors) + | 369 scale_fill_manual(values=sample.colors) + |
| 362 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 370 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 363 png("DFPlot.png") | 371 png("DFPlot.png") |
| 364 print(DPlot) | 372 print(DPlot) |
| 365 dev.off(); | 373 dev.off() |
| 374 | |
| 375 ggsave("DFPlot.pdf", DPlot) | |
| 366 write.table(x=DGenes, file="DFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) | 376 write.table(x=DGenes, file="DFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 367 } | 377 } |
| 368 | 378 |
| 369 # ---------------------- Plotting the cdr3 length ---------------------- | 379 # ---------------------- Plotting the cdr3 length ---------------------- |
| 370 | 380 |
| 382 scale_fill_manual(values=sample.colors) + | 392 scale_fill_manual(values=sample.colors) + |
| 383 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 393 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 384 png("CDR3LengthPlot.png",width = 1280, height = 720) | 394 png("CDR3LengthPlot.png",width = 1280, height = 720) |
| 385 CDR3LengthPlot | 395 CDR3LengthPlot |
| 386 dev.off() | 396 dev.off() |
| 397 | |
| 398 ggsave("CDR3LengthPlot.pdf", CDR3LengthPlot, width=12, height=7) | |
| 399 | |
| 387 write.table(x=CDR3Length, file="CDR3LengthPlot.txt", sep="\t",quote=F,row.names=F,col.names=T) | 400 write.table(x=CDR3Length, file="CDR3LengthPlot.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 388 | 401 |
| 389 # ---------------------- Plot the heatmaps ---------------------- | 402 # ---------------------- Plot the heatmaps ---------------------- |
| 390 | 403 |
| 391 #get the reverse order for the V and D genes | 404 #get the reverse order for the V and D genes |
| 411 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 424 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) |
| 412 | 425 |
| 413 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 426 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) |
| 414 print(img) | 427 print(img) |
| 415 dev.off() | 428 dev.off() |
| 429 | |
| 430 ggsave(paste("HeatmapVD_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=13, width=8) | |
| 431 | |
| 416 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) | 432 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) |
| 417 } | 433 } |
| 418 | 434 |
| 419 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) | 435 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) |
| 420 | 436 |
| 461 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 477 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) |
| 462 | 478 |
| 463 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 479 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) |
| 464 print(img) | 480 print(img) |
| 465 dev.off() | 481 dev.off() |
| 482 | |
| 483 ggsave(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=11, width=4) | |
| 484 | |
| 466 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) | 485 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) |
| 467 } | 486 } |
| 468 | 487 |
| 469 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) | 488 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) |
| 470 | 489 |
| 510 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 529 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) |
| 511 | 530 |
| 512 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 531 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) |
| 513 print(img) | 532 print(img) |
| 514 dev.off() | 533 dev.off() |
| 534 | |
| 535 ggsave(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, width=4, height=7) | |
| 536 | |
| 515 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) | 537 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) |
| 516 } | 538 } |
| 517 | 539 |
| 518 | 540 |
| 519 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) | 541 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) |
| 901 | 923 |
| 902 png("DReadingFrame.png") | 924 png("DReadingFrame.png") |
| 903 D.REGION.reading.frame | 925 D.REGION.reading.frame |
| 904 dev.off() | 926 dev.off() |
| 905 | 927 |
| 906 | 928 ggsave("DReadingFrame.pdf", D.REGION.reading.frame) |
| 907 | 929 |
| 908 | 930 |
| 909 # ---------------------- AA composition in CDR3 ---------------------- | 931 # ---------------------- AA composition in CDR3 ---------------------- |
| 910 | 932 |
| 911 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] | 933 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] |
| 935 AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) | 957 AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) |
| 936 AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) | 958 AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) |
| 937 AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) | 959 AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) |
| 938 AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) | 960 AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) |
| 939 AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") + scale_fill_manual(values=sample.colors) | 961 AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") + scale_fill_manual(values=sample.colors) |
| 940 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 962 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 941 | 963 |
| 942 png("AAComposition.png",width = 1280, height = 720) | 964 png("AAComposition.png",width = 1280, height = 720) |
| 943 AAfreqplot | 965 AAfreqplot |
| 944 dev.off() | 966 dev.off() |
| 967 | |
| 968 ggsave("AAComposition.pdf", AAfreqplot, width=12, height=7) | |
| 969 | |
| 945 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | 970 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) |
| 946 | 971 |
| 947 # ---------------------- AA median CDR3 length ---------------------- | 972 # ---------------------- AA median CDR3 length ---------------------- |
| 948 | 973 |
| 949 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")]) | 974 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")]) |
