Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 9:efa1f5a17b6e draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Mon, 19 Dec 2016 09:23:01 -0500 |
| parents | 8cbc1a8d27ae |
| children | d3ebaa2d2fe0 |
comparison
equal
deleted
inserted
replaced
| 8:8cbc1a8d27ae | 9:efa1f5a17b6e |
|---|---|
| 361 | 361 |
| 362 # ---------------------- Plotting the cdr3 length ---------------------- | 362 # ---------------------- Plotting the cdr3 length ---------------------- |
| 363 | 363 |
| 364 print("Report Clonality - CDR3 length plot") | 364 print("Report Clonality - CDR3 length plot") |
| 365 | 365 |
| 366 CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length.DNA")]) | 366 CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length")]) |
| 367 TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample]) | 367 TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample]) |
| 368 CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample") | 368 CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample") |
| 369 CDR3Length$Frequency = CDR3Length$Count * 100 / CDR3Length$total | 369 CDR3Length$Frequency = CDR3Length$Count * 100 / CDR3Length$total |
| 370 CDR3LengthPlot = ggplot(CDR3Length) | 370 CDR3LengthPlot = ggplot(CDR3Length) |
| 371 CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length.DNA, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 371 CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 372 ggtitle("Length distribution of CDR3") + | 372 ggtitle("Length distribution of CDR3") + |
| 373 xlab("CDR3 Length") + | 373 xlab("CDR3 Length") + |
| 374 ylab("Percentage of sequences") + | 374 ylab("Percentage of sequences") + |
| 375 scale_fill_manual(values=sample.colors) + | 375 scale_fill_manual(values=sample.colors) + |
| 376 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 376 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
| 398 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + | 398 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + |
| 399 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 399 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 400 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 400 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 401 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + | 401 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + |
| 402 xlab("D genes") + | 402 xlab("D genes") + |
| 403 ylab("V Genes") | 403 ylab("V Genes") + |
| 404 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major = element_line(colour = "gainsboro")) | |
| 404 | 405 |
| 405 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 406 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) |
| 406 print(img) | 407 print(img) |
| 407 dev.off() | 408 dev.off() |
| 408 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | 409 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) |
| 447 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + | 448 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + |
| 448 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 449 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 449 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 450 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 450 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + | 451 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + |
| 451 xlab("J genes") + | 452 xlab("J genes") + |
| 452 ylab("V Genes") | 453 ylab("V Genes") + |
| 454 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major = element_line(colour = "gainsboro")) | |
| 453 | 455 |
| 454 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 456 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) |
| 455 print(img) | 457 print(img) |
| 456 dev.off() | 458 dev.off() |
| 457 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | 459 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) |
| 495 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=relLength)) + | 497 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=relLength)) + |
| 496 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 498 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 497 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 499 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 498 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + | 500 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + |
| 499 xlab("J genes") + | 501 xlab("J genes") + |
| 500 ylab("D Genes") | 502 ylab("D Genes") + |
| 503 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major = element_line(colour = "gainsboro")) | |
| 501 | 504 |
| 502 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 505 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) |
| 503 print(img) | 506 print(img) |
| 504 dev.off() | 507 dev.off() |
| 505 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | 508 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) |
| 729 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 732 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
| 730 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 733 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 731 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 734 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 732 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 735 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 733 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 736 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 734 Median.CDR3.l=median(.SD$CDR3.Length.DNA)), | 737 Median.CDR3.l=median(.SD$CDR3.Length)), |
| 735 by=c("Sample")]) | 738 by=c("Sample")]) |
| 736 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 739 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 737 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 740 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
| 738 | 741 |
| 739 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 742 newData = data.frame(data.table(PRODF)[,list(unique=.N, |
| 748 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 751 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 749 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 752 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 750 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 753 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 751 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 754 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 752 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 755 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 753 Median.CDR3.l=median(.SD$CDR3.Length.DNA)), | 756 Median.CDR3.l=median(.SD$CDR3.Length)), |
| 754 by=c("Sample")]) | 757 by=c("Sample")]) |
| 755 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 758 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 756 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 759 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
| 757 | 760 |
| 758 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 761 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
| 767 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 770 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
| 768 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 771 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 769 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 772 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 770 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 773 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 771 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 774 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 772 Median.CDR3.l=median(.SD$CDR3.Length.DNA)), | 775 Median.CDR3.l=median(.SD$CDR3.Length)), |
| 773 by=c("Sample")]) | 776 by=c("Sample")]) |
| 774 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 777 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 775 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 778 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
| 776 | 779 |
| 777 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 780 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
| 786 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 789 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 787 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 790 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 788 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 791 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 789 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 792 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 790 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 793 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 791 Median.CDR3.l=median(.SD$CDR3.Length.DNA)), | 794 Median.CDR3.l=median(.SD$CDR3.Length)), |
| 792 by=c("Sample")]) | 795 by=c("Sample")]) |
| 793 | 796 |
| 794 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 797 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 795 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 798 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
| 796 } | 799 } |
| 861 dev.off() | 864 dev.off() |
| 862 write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T) | 865 write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T) |
| 863 | 866 |
| 864 # ---------------------- AA median CDR3 length ---------------------- | 867 # ---------------------- AA median CDR3 length ---------------------- |
| 865 | 868 |
| 866 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length.DNA))), by=c("Sample")]) | 869 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")]) |
| 867 write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 870 write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
| 868 | 871 |
