comparison report_clonality/RScript.r @ 44:1d8728f3ff37 draft

Uploaded
author davidvanzessen
date Fri, 08 Dec 2017 05:47:33 -0500
parents 2325074a8461
children 124b7fd92a3e
comparison
equal deleted inserted replaced
43:2325074a8461 44:1d8728f3ff37
439 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + 439 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) +
440 xlab("D genes") + 440 xlab("D genes") +
441 ylab("V Genes") + 441 ylab("V Genes") +
442 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) 442 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro"))
443 443
444 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) 444 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=200+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name)))
445 print(img) 445 print(img)
446 dev.off() 446 dev.off()
447 447
448 ggsave(paste("HeatmapVD_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=13, width=8) 448 ggsave(paste("HeatmapVD_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=13, width=8)
449 449
456 maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")]) 456 maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")])
457 VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T) 457 VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T)
458 VandDCount$relLength = VandDCount$l / VandDCount$max 458 VandDCount$relLength = VandDCount$l / VandDCount$max
459 check = is.nan(VandDCount$relLength) 459 check = is.nan(VandDCount$relLength)
460 if(any(check)){ 460 if(any(check)){
461 VandDCount[check,"relLength"] = 0 461 VandDCount[check,"relLength"] = 0
462 } 462 }
463 463
464 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name) 464 completeVD = merge(VandDCount, revVchain, by.x="Top.V.Gene", by.y="v.name", all=TRUE)
465 465 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all=TRUE)
466 completeVD = merge(VandDCount, cartegianProductVD, by.x=c("Top.V.Gene", "Top.D.Gene"), by.y=c("Top.V.Gene", "Top.D.Gene"), all=TRUE)
467
468 completeVD = merge(completeVD, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE)
469
470 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE)
471 466
472 fltr = is.nan(completeVD$relLength) 467 fltr = is.nan(completeVD$relLength)
473 if(all(fltr)){ 468 if(all(fltr)){
474 completeVD[fltr,"relLength"] = 0 469 completeVD[fltr,"relLength"] = 0
475 } 470 }
492 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + 487 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) +
493 xlab("J genes") + 488 xlab("J genes") +
494 ylab("V Genes") + 489 ylab("V Genes") +
495 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) 490 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro"))
496 491
497 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) 492 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=200+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name)))
498 print(img) 493 print(img)
499 dev.off() 494 dev.off()
500 495
501 ggsave(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=11, width=4) 496 ggsave(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, height=11, width=4)
502 497
503 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) 498 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA)
504 } 499 }
500
501
505 502
506 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) 503 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")])
507 504
508 VandJCount$l = log(VandJCount$Length) 505 VandJCount$l = log(VandJCount$Length)
509 maxVJ = data.frame(data.table(VandJCount)[, list(max=max(l)), by=c("Sample")]) 506 maxVJ = data.frame(data.table(VandJCount)[, list(max=max(l)), by=c("Sample")])
513 check = is.nan(VandJCount$relLength) 510 check = is.nan(VandJCount$relLength)
514 if(any(check)){ 511 if(any(check)){
515 VandJCount[check,"relLength"] = 0 512 VandJCount[check,"relLength"] = 0
516 } 513 }
517 514
518 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name) 515 completeVJ = merge(VandJCount, revVchain, by.x="Top.V.Gene", by.y="v.name", all=TRUE)
519 516 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all=TRUE)
520 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE)
521 completeVJ = merge(completeVJ, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE)
522 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE)
523 517
524 fltr = is.nan(completeVJ$relLength) 518 fltr = is.nan(completeVJ$relLength)
525 if(any(fltr)){ 519 if(any(fltr)){
526 completeVJ[fltr,"relLength"] = 1 520 completeVJ[fltr,"relLength"] = 1
527 } 521 }
544 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + 538 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) +
545 xlab("J genes") + 539 xlab("J genes") +
546 ylab("D Genes") + 540 ylab("D Genes") +
547 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) 541 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro"))
548 542
549 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) 543 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=200+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name)))
550 print(img) 544 print(img)
551 dev.off() 545 dev.off()
552 546
553 ggsave(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, width=4, height=7) 547 ggsave(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".pdf", sep=""), img, width=4, height=7)
554 548
568 DandJCount[check,"relLength"] = 0 562 DandJCount[check,"relLength"] = 0
569 } 563 }
570 564
571 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name) 565 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name)
572 566
573 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) 567 completeDJ = merge(DandJCount, revDchain, by.x="Top.D.Gene", by.y="v.name", all=TRUE)
574 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) 568 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all=TRUE)
575 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE)
576 569
577 fltr = is.nan(completeDJ$relLength) 570 fltr = is.nan(completeDJ$relLength)
578 if(any(fltr)){ 571 if(any(fltr)){
579 completeDJ[fltr, "relLength"] = 1 572 completeDJ[fltr, "relLength"] = 1
580 } 573 }
993 clonaltype.in.replicates = inputdata 986 clonaltype.in.replicates = inputdata
994 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$Functionality %in% c("productive (see comment)","productive"),] 987 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$Functionality %in% c("productive (see comment)","productive"),]
995 clonaltype.in.replicates = clonaltype.in.replicates[!(is.na(clonaltype.in.replicates$ID) | is.na(clonaltype.in.replicates$Top.V.Gene) | is.na(clonaltype.in.replicates$Top.J.Gene)),] 988 clonaltype.in.replicates = clonaltype.in.replicates[!(is.na(clonaltype.in.replicates$ID) | is.na(clonaltype.in.replicates$Top.V.Gene) | is.na(clonaltype.in.replicates$Top.J.Gene)),]
996 clonaltype = unlist(strsplit(clonaltype, ",")) 989 clonaltype = unlist(strsplit(clonaltype, ","))
997 990
998 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[paste_columns], sep = ":")) 991 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":"))
999 992
1000 clonaltype.in.replicates = clonaltype.in.replicates[!duplicated(clonaltype.in.replicates$clonaltype),] 993 clonaltype.in.replicates = clonaltype.in.replicates[!duplicated(clonaltype.in.replicates$clonaltype),]
1001 994
1002 clonaltype = clonaltype[-which(clonaltype == "Sample")] 995 clonaltype = clonaltype[-which(clonaltype == "Sample")]
1003 996