Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 24:d5d203d38c8a draft
Uploaded
| author | davidvanzessen | 
|---|---|
| date | Wed, 01 Feb 2017 09:48:38 -0500 | 
| parents | 9185c3dfc679 | 
| children | 94765af0db1f | 
   comparison
  equal
  deleted
  inserted
  replaced
| 23:e2fbdfacec1d | 24:d5d203d38c8a | 
|---|---|
| 47 | 47 | 
| 48 print("Report Clonality - Data preperation") | 48 print("Report Clonality - Data preperation") | 
| 49 | 49 | 
| 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="", stringsAsFactors=F) | 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="", stringsAsFactors=F) | 
| 51 | 51 | 
| 52 | |
| 52 print(paste("nrows: ", nrow(inputdata))) | 53 print(paste("nrows: ", nrow(inputdata))) | 
| 53 | 54 | 
| 54 setwd(outdir) | 55 setwd(outdir) | 
| 55 | 56 | 
| 56 # remove weird rows | 57 # remove weird rows | 
| 119 clonalityFrame$clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(clonaltype, ","))], sep = ":")) | 120 clonalityFrame$clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(clonaltype, ","))], sep = ":")) | 
| 120 clonalityFrame$clonality_clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(paste(clonaltype, ",Replicate", sep=""), ","))], sep = ":")) | 121 clonalityFrame$clonality_clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(paste(clonaltype, ",Replicate", sep=""), ","))], sep = ":")) | 
| 121 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$clonality_clonaltype), ] | 122 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$clonality_clonaltype), ] | 
| 122 } | 123 } | 
| 123 | 124 | 
| 124 print("SAMPLE TABLE:") | |
| 125 print(table(PRODF$Sample)) | |
| 126 | |
| 127 prod.unique.sample.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample")]) | 125 prod.unique.sample.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample")]) | 
| 128 prod.unique.rep.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample", "Replicate")]) | 126 prod.unique.rep.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample", "Replicate")]) | 
| 129 | 127 | 
| 130 unprod.unique.sample.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample")]) | 128 unprod.unique.sample.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample")]) | 
| 131 unprod.unique.rep.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample", "Replicate")]) | 129 unprod.unique.rep.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample", "Replicate")]) | 
| 159 | 157 | 
| 160 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive | 158 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive | 
| 161 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) | 159 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) | 
| 162 #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) | 160 #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) | 
| 163 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) | 161 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) | 
| 162 | |
| 163 print("SAMPLE TABLE:") | |
| 164 print(table(PRODF$Sample)) | |
| 164 | 165 | 
| 165 #write the samples to a file | 166 #write the samples to a file | 
| 166 sampleFile <- file("samples.txt") | 167 sampleFile <- file("samples.txt") | 
| 167 un = unique(inputdata$Sample) | 168 un = unique(inputdata$Sample) | 
| 168 un = paste(un, sep="\n") | 169 un = paste(un, sep="\n") | 
| 381 scale_fill_manual(values=sample.colors) + | 382 scale_fill_manual(values=sample.colors) + | 
| 382 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 383 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 
| 383 png("CDR3LengthPlot.png",width = 1280, height = 720) | 384 png("CDR3LengthPlot.png",width = 1280, height = 720) | 
| 384 CDR3LengthPlot | 385 CDR3LengthPlot | 
| 385 dev.off() | 386 dev.off() | 
| 386 write.table(x=CDR3Length, file="CDR3LengthPlot.csv", sep=",",quote=F,row.names=F,col.names=T) | 387 write.table(x=CDR3Length, file="CDR3LengthPlot.txt", sep="\t",quote=F,row.names=F,col.names=T) | 
| 387 | 388 | 
| 388 # ---------------------- Plot the heatmaps ---------------------- | 389 # ---------------------- Plot the heatmaps ---------------------- | 
| 389 | 390 | 
| 390 #get the reverse order for the V and D genes | 391 #get the reverse order for the V and D genes | 
| 391 revVchain = Vchain | 392 revVchain = Vchain | 
| 410 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 411 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 
| 411 | 412 | 
| 412 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 413 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 
| 413 print(img) | 414 print(img) | 
| 414 dev.off() | 415 dev.off() | 
| 415 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | 416 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) | 
| 416 } | 417 } | 
| 417 | 418 | 
| 418 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) | 419 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) | 
| 419 | 420 | 
| 420 VandDCount$l = log(VandDCount$Length) | 421 VandDCount$l = log(VandDCount$Length) | 
| 460 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 461 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 
| 461 | 462 | 
| 462 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 463 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 
| 463 print(img) | 464 print(img) | 
| 464 dev.off() | 465 dev.off() | 
| 465 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | 466 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) | 
| 466 } | 467 } | 
| 467 | 468 | 
| 468 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) | 469 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) | 
| 469 | 470 | 
| 470 VandJCount$l = log(VandJCount$Length) | 471 VandJCount$l = log(VandJCount$Length) | 
| 509 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 510 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), panel.grid.major = element_line(colour = "gainsboro")) | 
| 510 | 511 | 
| 511 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 512 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 
| 512 print(img) | 513 print(img) | 
| 513 dev.off() | 514 dev.off() | 
| 514 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | 515 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".txt", sep=""), sep="\t",quote=F,row.names=T,col.names=NA) | 
| 515 } | 516 } | 
| 516 | 517 | 
| 517 | 518 | 
| 518 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) | 519 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) | 
| 519 | 520 | 
| 699 | 700 | 
| 700 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") | 701 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") | 
| 701 if(all(imgtcolumns %in% colnames(inputdata))) | 702 if(all(imgtcolumns %in% colnames(inputdata))) | 
| 702 { | 703 { | 
| 703 print("found IMGT columns, running junction analysis") | 704 print("found IMGT columns, running junction analysis") | 
| 704 | 705 | 
| 705 if(locus %in% c("IGK","IGL", "TRA", "TRG")){ | |
| 706 print("VJ recombination, no filtering on absent D") | |
| 707 } else { | |
| 708 print("VDJ recombination, using N column for junction analysis") | |
| 709 fltr = nchar(PRODF$Top.D.Gene) < 4 | |
| 710 print(paste("Removing", sum(fltr), "sequences without a identified D")) | |
| 711 PRODF = PRODF[!fltr,] | |
| 712 } | |
| 713 | |
| 714 | |
| 715 #ensure certain columns are in the data (files generated with older versions of IMGT Loader) | 706 #ensure certain columns are in the data (files generated with older versions of IMGT Loader) | 
| 716 col.checks = c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb") | 707 col.checks = c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb") | 
| 717 for(col.check in col.checks){ | 708 for(col.check in col.checks){ | 
| 718 if(!(col.check %in% names(PRODF))){ | 709 if(!(col.check %in% names(PRODF))){ | 
| 719 print(paste(col.check, "not found adding new column")) | 710 print(paste(col.check, "not found adding new column")) | 
| 728 UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0))) | 719 UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0))) | 
| 729 } | 720 } | 
| 730 } | 721 } | 
| 731 } | 722 } | 
| 732 | 723 | 
| 724 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,] | |
| 725 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,] | |
| 726 | |
| 733 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } | 727 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } | 
| 734 | 728 | 
| 735 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 729 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 
| 736 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 730 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 
| 737 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 731 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 
| 738 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 732 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 
| 739 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 733 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 
| 740 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 734 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 
| 747 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 741 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 
| 748 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 742 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 
| 749 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 743 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 
| 750 by=c("Sample")]) | 744 by=c("Sample")]) | 
| 751 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 745 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 
| 752 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 746 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 
| 753 | 747 | 
| 754 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 748 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 
| 755 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 749 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 
| 756 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 750 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 
| 757 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 751 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 
| 758 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 752 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 
| 759 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 753 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 
| 766 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 760 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 
| 767 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 761 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 
| 768 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 762 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 
| 769 by=c("Sample")]) | 763 by=c("Sample")]) | 
| 770 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 764 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 
| 771 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 765 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 
| 772 | 766 | 
| 773 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 767 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 
| 774 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 768 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 
| 775 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 769 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 
| 776 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 770 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 
| 777 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 771 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 
| 778 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 772 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 
| 785 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 779 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 
| 786 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 780 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 
| 787 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 781 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 
| 788 by=c("Sample")]) | 782 by=c("Sample")]) | 
| 789 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 783 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 
| 790 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 784 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 
| 791 | 785 | 
| 792 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 786 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 
| 793 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 787 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 
| 794 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 788 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 
| 795 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 789 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 
| 796 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 790 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 
| 797 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 791 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 
| 803 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 797 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 
| 804 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 798 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 
| 805 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 799 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 
| 806 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 800 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 
| 807 by=c("Sample")]) | 801 by=c("Sample")]) | 
| 808 | |
| 809 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 802 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 
| 810 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 803 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 
| 804 | |
| 805 #---------------- again for no-D | |
| 806 | |
| 807 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | |
| 808 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | |
| 809 P1=mean(.SD$P3V.nt.nb, na.rm=T), | |
| 810 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | |
| 811 P2=mean(.SD$P5J.nt.nb, na.rm=T), | |
| 812 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | |
| 813 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | |
| 814 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | |
| 815 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | |
| 816 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | |
| 817 by=c("Sample")]) | |
| 818 print(newData) | |
| 819 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | |
| 820 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | |
| 821 | |
| 822 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | |
| 823 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | |
| 824 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | |
| 825 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | |
| 826 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | |
| 827 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | |
| 828 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | |
| 829 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | |
| 830 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | |
| 831 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | |
| 832 by=c("Sample")]) | |
| 833 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | |
| 834 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | |
| 835 | |
| 836 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | |
| 837 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | |
| 838 P1=mean(.SD$P3V.nt.nb, na.rm=T), | |
| 839 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | |
| 840 P2=mean(.SD$P5J.nt.nb, na.rm=T), | |
| 841 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | |
| 842 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | |
| 843 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | |
| 844 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | |
| 845 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | |
| 846 by=c("Sample")]) | |
| 847 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | |
| 848 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | |
| 849 | |
| 850 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | |
| 851 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | |
| 852 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | |
| 853 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | |
| 854 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | |
| 855 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | |
| 856 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | |
| 857 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | |
| 858 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | |
| 859 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | |
| 860 by=c("Sample")]) | |
| 861 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | |
| 862 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | |
| 811 } | 863 } | 
| 812 | 864 | 
| 813 PRODF = bak | 865 PRODF = bak | 
| 814 | 866 | 
| 815 | 867 | 
| 820 chck = is.na(D.REGION.reading.frame$D.REGION.reading.frame) | 872 chck = is.na(D.REGION.reading.frame$D.REGION.reading.frame) | 
| 821 if(any(chck)){ | 873 if(any(chck)){ | 
| 822 D.REGION.reading.frame[chck,"D.REGION.reading.frame"] = "No D" | 874 D.REGION.reading.frame[chck,"D.REGION.reading.frame"] = "No D" | 
| 823 } | 875 } | 
| 824 | 876 | 
| 825 D.REGION.reading.frame = data.frame(data.table(D.REGION.reading.frame)[, list(Freq=.N), by=c("Sample", "D.REGION.reading.frame")]) | 877 D.REGION.reading.frame.1 = data.frame(data.table(D.REGION.reading.frame)[, list(Freq=.N), by=c("Sample", "D.REGION.reading.frame")]) | 
| 826 | 878 | 
| 827 write.table(D.REGION.reading.frame, "DReadingFrame.csv" , sep="\t",quote=F,row.names=F,col.names=T) | 879 D.REGION.reading.frame.2 = data.frame(data.table(D.REGION.reading.frame)[, list(sample.sum=sum(as.numeric(.SD$D.REGION.reading.frame), na.rm=T)), by=c("Sample")]) | 
| 880 | |
| 881 D.REGION.reading.frame = merge(D.REGION.reading.frame.1, D.REGION.reading.frame.2, by="Sample") | |
| 882 | |
| 883 D.REGION.reading.frame$percentage = round(D.REGION.reading.frame$Freq / D.REGION.reading.frame$sample.sum * 100, 1) | |
| 884 | |
| 885 write.table(D.REGION.reading.frame, "DReadingFrame.txt" , sep="\t",quote=F,row.names=F,col.names=T) | |
| 828 | 886 | 
| 829 D.REGION.reading.frame = ggplot(D.REGION.reading.frame) | 887 D.REGION.reading.frame = ggplot(D.REGION.reading.frame) | 
| 830 D.REGION.reading.frame = D.REGION.reading.frame + geom_bar(aes( x = D.REGION.reading.frame, y = Freq, fill=Sample), stat='identity', position='dodge' ) + ggtitle("D reading frame") + xlab("Frequency") + ylab("Frame") | 888 D.REGION.reading.frame = D.REGION.reading.frame + geom_bar(aes( x = D.REGION.reading.frame, y = percentage, fill=Sample), stat='identity', position='dodge' ) + ggtitle("D reading frame") + xlab("Frequency") + ylab("Frame") | 
| 831 D.REGION.reading.frame = D.REGION.reading.frame + scale_fill_manual(values=sample.colors) | 889 D.REGION.reading.frame = D.REGION.reading.frame + scale_fill_manual(values=sample.colors) | 
| 832 D.REGION.reading.frame = D.REGION.reading.frame + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 890 D.REGION.reading.frame = D.REGION.reading.frame + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 
| 833 | 891 | 
| 834 png("DReadingFrame.png") | 892 png("DReadingFrame.png") | 
| 835 D.REGION.reading.frame | 893 D.REGION.reading.frame | 
| 876 dev.off() | 934 dev.off() | 
| 877 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | 935 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | 
| 878 | 936 | 
| 879 # ---------------------- AA median CDR3 length ---------------------- | 937 # ---------------------- AA median CDR3 length ---------------------- | 
| 880 | 938 | 
| 881 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")]) | 939 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")]) | 
| 882 write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 940 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 
| 883 | 941 | 
