Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 26:28fbbdfd7a87 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Mon, 13 Feb 2017 09:08:46 -0500 |
| parents | 94765af0db1f |
| children | 1f83e14f173b |
comparison
equal
deleted
inserted
replaced
| 25:94765af0db1f | 26:28fbbdfd7a87 |
|---|---|
| 612 | 612 |
| 613 coincidence.table = data.frame(table(res$type)) | 613 coincidence.table = data.frame(table(res$type)) |
| 614 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") | 614 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") |
| 615 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 615 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
| 616 } | 616 } |
| 617 } else if(clonality_method == "old") { | 617 } |
| 618 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) | 618 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) |
| 619 | 619 |
| 620 #write files for every coincidence group of >1 | 620 #write files for every coincidence group of >1 |
| 621 samples = unique(clonalFreq$Sample) | 621 samples = unique(clonalFreq$Sample) |
| 622 for(sample in samples){ | 622 for(sample in samples){ |
| 623 clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,] | 623 clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,] |
| 624 if(max(clonalFreqSample$Type) > 1){ | 624 if(max(clonalFreqSample$Type) > 1){ |
| 625 for(i in 2:max(clonalFreqSample$Type)){ | 625 for(i in 2:max(clonalFreqSample$Type)){ |
| 626 clonalFreqSampleType = clonalFreqSample[clonalFreqSample$Type == i,] | 626 clonalFreqSampleType = clonalFreqSample[clonalFreqSample$Type == i,] |
| 627 clonalityFrame.sub = clonalityFrame[clonalityFrame$clonaltype %in% clonalFreqSampleType$clonaltype,] | 627 clonalityFrame.sub = clonalityFrame[clonalityFrame$clonaltype %in% clonalFreqSampleType$clonaltype,] |
| 628 clonalityFrame.sub = clonalityFrame.sub[order(clonalityFrame.sub$clonaltype),] | 628 clonalityFrame.sub = clonalityFrame.sub[order(clonalityFrame.sub$clonaltype),] |
| 629 write.table(clonalityFrame.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 629 write.table(clonalityFrame.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
| 630 } | 630 } |
| 631 } | 631 } |
| 632 } | 632 } |
| 633 | 633 |
| 634 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) | 634 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) |
| 635 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count | 635 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count |
| 636 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) | 636 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) |
| 637 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") | 637 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") |
| 638 | 638 |
| 639 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') | 639 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') |
| 640 tcct = textConnection(ct) | 640 tcct = textConnection(ct) |
| 641 CT = read.table(tcct, sep="\t", header=TRUE) | 641 CT = read.table(tcct, sep="\t", header=TRUE) |
| 642 close(tcct) | 642 close(tcct) |
| 643 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) | 643 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) |
| 644 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight | 644 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight |
| 645 | 645 |
| 646 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")]) | 646 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")]) |
| 647 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) | 647 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) |
| 648 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) | 648 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) |
| 649 ReplicateReads$Reads = as.numeric(ReplicateReads$Reads) | 649 ReplicateReads$Reads = as.numeric(ReplicateReads$Reads) |
| 650 ReplicateReads$squared = as.numeric(ReplicateReads$Reads * ReplicateReads$Reads) | 650 ReplicateReads$squared = as.numeric(ReplicateReads$Reads * ReplicateReads$Reads) |
| 651 | 651 |
| 652 ReplicatePrint <- function(dat){ | 652 ReplicatePrint <- function(dat){ |
| 653 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | 653 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 654 } | 654 } |
| 655 | 655 |
| 656 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) | 656 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) |
| 657 lapply(ReplicateSplit, FUN=ReplicatePrint) | 657 lapply(ReplicateSplit, FUN=ReplicatePrint) |
| 658 | 658 |
| 659 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) | 659 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) |
| 660 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) | 660 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) |
| 661 | 661 |
| 662 ReplicateSumPrint <- function(dat){ | 662 ReplicateSumPrint <- function(dat){ |
| 663 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | 663 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 664 } | 664 } |
| 665 | 665 |
| 666 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) | 666 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) |
| 667 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) | 667 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) |
| 668 | 668 |
| 669 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) | 669 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) |
| 670 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) | 670 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) |
| 671 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow | 671 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow |
| 672 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) | 672 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) |
| 673 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) | 673 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) |
| 674 | 674 |
| 675 ClonalityScorePrint <- function(dat){ | 675 ClonalityScorePrint <- function(dat){ |
| 676 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | 676 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 677 } | 677 } |
| 678 | 678 |
| 679 clonalityScore = clonalFreqCount[c("Sample", "Result")] | 679 clonalityScore = clonalFreqCount[c("Sample", "Result")] |
| 680 clonalityScore = unique(clonalityScore) | 680 clonalityScore = unique(clonalityScore) |
| 681 | 681 |
| 682 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) | 682 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) |
| 683 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) | 683 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) |
| 684 | 684 |
| 685 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] | 685 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] |
| 686 | 686 |
| 687 | 687 |
| 688 | 688 |
| 689 ClonalityOverviewPrint <- function(dat){ | 689 ClonalityOverviewPrint <- function(dat){ |
| 690 dat = dat[order(dat[,2]),] | 690 dat = dat[order(dat[,2]),] |
| 691 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | 691 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 692 } | 692 } |
| 693 | 693 |
| 694 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) | 694 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) |
| 695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) | 695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) |
| 696 } | 696 |
| 697 } | 697 } |
| 698 | 698 |
| 699 bak = PRODF | 699 bak = PRODF |
| 700 bakun = UNPROD | 700 bakun = UNPROD |
| 701 | 701 |
| 722 } | 722 } |
| 723 } | 723 } |
| 724 | 724 |
| 725 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,] | 725 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,] |
| 726 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,] | 726 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,] |
| 727 write.table(PRODF.no.D, "productive_no_D.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | |
| 727 | 728 |
| 728 UNPROD.with.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) > 2,] | 729 UNPROD.with.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) > 2,] |
| 729 UNPROD.no.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) < 4,] | 730 UNPROD.no.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) < 4,] |
| 731 write.table(UNPROD.no.D, "unproductive_no_D.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | |
| 730 | 732 |
| 731 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } | 733 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } |
| 732 | |
| 733 print("---- table prod.with.d cdr3.length ----") | |
| 734 print(table(PRODF.with.D$CDR3.Length, useNA="ifany")) | |
| 735 print(median(PRODF.with.D$CDR3.Length, na.rm=T)) | |
| 736 | 734 |
| 737 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 735 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, |
| 738 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 736 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 739 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 737 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 740 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 738 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
| 770 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), | 768 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), |
| 771 by=c("Sample")]) | 769 by=c("Sample")]) |
| 772 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 770 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 773 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 771 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 774 | 772 |
| 775 print("---- table unprod.with.d cdr3.length ----") | |
| 776 print(table(UNPROD.with.D$CDR3.Length, useNA="ifany")) | |
| 777 print(median(UNPROD.with.D$CDR3.Length, na.rm=T)) | |
| 778 | |
| 779 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, | 773 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, |
| 780 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 774 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 781 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 775 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 782 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 776 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
| 783 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 777 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
| 817 #---------------- again for no-D | 811 #---------------- again for no-D |
| 818 | 812 |
| 819 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | 813 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, |
| 820 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 814 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 821 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 815 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 822 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 816 N1=mean(.SD$N.REGION.nt.nb, na.rm=T), |
| 823 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 817 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
| 824 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 818 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 825 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 819 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 826 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 820 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), |
| 827 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 821 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 828 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 822 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 829 by=c("Sample")]) | 823 by=c("Sample")]) |
| 830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 824 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 831 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 825 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 832 | 826 |
| 833 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | 827 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, |
| 834 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 828 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 835 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 829 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
| 836 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 830 N1=median(.SD$N.REGION.nt.nb, na.rm=T), |
| 837 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 831 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 838 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 832 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 839 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 833 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 840 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 834 Total.N=median(.SD$N.REGION.nt.nb, na.rm=T), |
| 841 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 835 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 842 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 836 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 843 by=c("Sample")]) | 837 by=c("Sample")]) |
| 844 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 838 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 845 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 839 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 846 | 840 |
| 841 print(paste("mean N:", mean(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) | |
| 842 | |
| 847 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, | 843 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
| 848 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 844 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 849 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 845 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 850 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 846 N1=mean(.SD$N.REGION.nt.nb, na.rm=T), |
| 851 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 847 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
| 852 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 848 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 853 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 849 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 854 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 850 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), |
| 855 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 851 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 856 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 852 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 857 by=c("Sample")]) | 853 by=c("Sample")]) |
| 858 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 854 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 859 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 855 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 860 | 856 |
| 857 print(paste("median N:", num_median(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) | |
| 858 | |
| 861 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, | 859 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
| 862 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 860 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 863 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 861 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
| 864 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 862 N1=median(.SD$N.REGION.nt.nb, na.rm=T), |
| 865 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 863 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 866 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 864 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 867 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 865 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 868 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 866 Total.N=median(.SD$N.REGION.nt.nb, na.rm=T), |
| 869 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 867 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 870 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 868 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 871 by=c("Sample")]) | 869 by=c("Sample")]) |
| 872 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 870 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 873 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 871 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 958 clonaltype = clonaltype[-which(clonaltype == "Sample")] | 956 clonaltype = clonaltype[-which(clonaltype == "Sample")] |
| 959 | 957 |
| 960 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":")) | 958 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":")) |
| 961 clonaltype.in.replicates = clonaltype.in.replicates[,c("clonaltype","Replicate", "ID", "Sequence", "Sample")] | 959 clonaltype.in.replicates = clonaltype.in.replicates[,c("clonaltype","Replicate", "ID", "Sequence", "Sample")] |
| 962 | 960 |
| 963 print(head(clonaltype.in.replicates)) | |
| 964 | |
| 965 clonaltype.counts = data.frame(table(clonaltype.in.replicates$clonaltype)) | 961 clonaltype.counts = data.frame(table(clonaltype.in.replicates$clonaltype)) |
| 966 names(clonaltype.counts) = c("clonaltype", "coincidence") | 962 names(clonaltype.counts) = c("clonaltype", "coincidence") |
| 967 | 963 |
| 968 print(head(clonaltype.counts)) | |
| 969 | |
| 970 clonaltype.counts = clonaltype.counts[clonaltype.counts$coincidence > 1,] | 964 clonaltype.counts = clonaltype.counts[clonaltype.counts$coincidence > 1,] |
| 971 | |
| 972 head(clonaltype.counts) | |
| 973 | 965 |
| 974 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$clonaltype %in% clonaltype.counts$clonaltype,] | 966 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$clonaltype %in% clonaltype.counts$clonaltype,] |
| 975 clonaltype.in.replicates = merge(clonaltype.in.replicates, clonaltype.counts, by="clonaltype") | 967 clonaltype.in.replicates = merge(clonaltype.in.replicates, clonaltype.counts, by="clonaltype") |
| 976 print(head(clonaltype.in.replicates)) | |
| 977 clonaltype.in.replicates = clonaltype.in.replicates[order(clonaltype.in.replicates$clonaltype),c("coincidence","clonaltype", "Sample", "Replicate", "ID", "Sequence")] | 968 clonaltype.in.replicates = clonaltype.in.replicates[order(clonaltype.in.replicates$clonaltype),c("coincidence","clonaltype", "Sample", "Replicate", "ID", "Sequence")] |
| 978 | 969 |
| 979 write.table(clonaltype.in.replicates, "clonaltypes_replicates.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | 970 write.table(clonaltype.in.replicates, "clonaltypes_replicates.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) |
| 980 | 971 |
| 981 | 972 |
