Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 26:28fbbdfd7a87 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 13 Feb 2017 09:08:46 -0500 |
parents | 94765af0db1f |
children | 1f83e14f173b |
comparison
equal
deleted
inserted
replaced
25:94765af0db1f | 26:28fbbdfd7a87 |
---|---|
612 | 612 |
613 coincidence.table = data.frame(table(res$type)) | 613 coincidence.table = data.frame(table(res$type)) |
614 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") | 614 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") |
615 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 615 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
616 } | 616 } |
617 } else if(clonality_method == "old") { | 617 } |
618 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) | 618 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) |
619 | 619 |
620 #write files for every coincidence group of >1 | 620 #write files for every coincidence group of >1 |
621 samples = unique(clonalFreq$Sample) | 621 samples = unique(clonalFreq$Sample) |
622 for(sample in samples){ | 622 for(sample in samples){ |
623 clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,] | 623 clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,] |
624 if(max(clonalFreqSample$Type) > 1){ | 624 if(max(clonalFreqSample$Type) > 1){ |
625 for(i in 2:max(clonalFreqSample$Type)){ | 625 for(i in 2:max(clonalFreqSample$Type)){ |
626 clonalFreqSampleType = clonalFreqSample[clonalFreqSample$Type == i,] | 626 clonalFreqSampleType = clonalFreqSample[clonalFreqSample$Type == i,] |
627 clonalityFrame.sub = clonalityFrame[clonalityFrame$clonaltype %in% clonalFreqSampleType$clonaltype,] | 627 clonalityFrame.sub = clonalityFrame[clonalityFrame$clonaltype %in% clonalFreqSampleType$clonaltype,] |
628 clonalityFrame.sub = clonalityFrame.sub[order(clonalityFrame.sub$clonaltype),] | 628 clonalityFrame.sub = clonalityFrame.sub[order(clonalityFrame.sub$clonaltype),] |
629 write.table(clonalityFrame.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 629 write.table(clonalityFrame.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
630 } | 630 } |
631 } | 631 } |
632 } | 632 } |
633 | 633 |
634 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) | 634 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) |
635 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count | 635 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count |
636 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) | 636 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) |
637 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") | 637 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") |
638 | 638 |
639 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') | 639 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') |
640 tcct = textConnection(ct) | 640 tcct = textConnection(ct) |
641 CT = read.table(tcct, sep="\t", header=TRUE) | 641 CT = read.table(tcct, sep="\t", header=TRUE) |
642 close(tcct) | 642 close(tcct) |
643 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) | 643 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) |
644 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight | 644 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight |
645 | 645 |
646 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")]) | 646 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")]) |
647 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) | 647 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) |
648 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) | 648 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) |
649 ReplicateReads$Reads = as.numeric(ReplicateReads$Reads) | 649 ReplicateReads$Reads = as.numeric(ReplicateReads$Reads) |
650 ReplicateReads$squared = as.numeric(ReplicateReads$Reads * ReplicateReads$Reads) | 650 ReplicateReads$squared = as.numeric(ReplicateReads$Reads * ReplicateReads$Reads) |
651 | 651 |
652 ReplicatePrint <- function(dat){ | 652 ReplicatePrint <- function(dat){ |
653 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | 653 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F) |
654 } | 654 } |
655 | 655 |
656 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) | 656 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) |
657 lapply(ReplicateSplit, FUN=ReplicatePrint) | 657 lapply(ReplicateSplit, FUN=ReplicatePrint) |
658 | 658 |
659 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) | 659 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) |
660 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) | 660 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) |
661 | 661 |
662 ReplicateSumPrint <- function(dat){ | 662 ReplicateSumPrint <- function(dat){ |
663 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | 663 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F) |
664 } | 664 } |
665 | 665 |
666 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) | 666 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) |
667 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) | 667 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) |
668 | 668 |
669 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) | 669 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) |
670 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) | 670 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) |
671 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow | 671 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow |
672 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) | 672 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) |
673 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) | 673 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) |
674 | 674 |
675 ClonalityScorePrint <- function(dat){ | 675 ClonalityScorePrint <- function(dat){ |
676 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | 676 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F) |
677 } | 677 } |
678 | 678 |
679 clonalityScore = clonalFreqCount[c("Sample", "Result")] | 679 clonalityScore = clonalFreqCount[c("Sample", "Result")] |
680 clonalityScore = unique(clonalityScore) | 680 clonalityScore = unique(clonalityScore) |
681 | 681 |
682 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) | 682 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) |
683 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) | 683 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) |
684 | 684 |
685 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] | 685 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] |
686 | 686 |
687 | 687 |
688 | 688 |
689 ClonalityOverviewPrint <- function(dat){ | 689 ClonalityOverviewPrint <- function(dat){ |
690 dat = dat[order(dat[,2]),] | 690 dat = dat[order(dat[,2]),] |
691 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | 691 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F) |
692 } | 692 } |
693 | 693 |
694 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) | 694 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) |
695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) | 695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) |
696 } | 696 |
697 } | 697 } |
698 | 698 |
699 bak = PRODF | 699 bak = PRODF |
700 bakun = UNPROD | 700 bakun = UNPROD |
701 | 701 |
722 } | 722 } |
723 } | 723 } |
724 | 724 |
725 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,] | 725 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,] |
726 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,] | 726 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,] |
727 write.table(PRODF.no.D, "productive_no_D.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | |
727 | 728 |
728 UNPROD.with.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) > 2,] | 729 UNPROD.with.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) > 2,] |
729 UNPROD.no.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) < 4,] | 730 UNPROD.no.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) < 4,] |
731 write.table(UNPROD.no.D, "unproductive_no_D.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | |
730 | 732 |
731 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } | 733 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } |
732 | |
733 print("---- table prod.with.d cdr3.length ----") | |
734 print(table(PRODF.with.D$CDR3.Length, useNA="ifany")) | |
735 print(median(PRODF.with.D$CDR3.Length, na.rm=T)) | |
736 | 734 |
737 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 735 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, |
738 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 736 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
739 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 737 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
740 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 738 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
770 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), | 768 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), |
771 by=c("Sample")]) | 769 by=c("Sample")]) |
772 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 770 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
773 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 771 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
774 | 772 |
775 print("---- table unprod.with.d cdr3.length ----") | |
776 print(table(UNPROD.with.D$CDR3.Length, useNA="ifany")) | |
777 print(median(UNPROD.with.D$CDR3.Length, na.rm=T)) | |
778 | |
779 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, | 773 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, |
780 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 774 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
781 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 775 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
782 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 776 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
783 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 777 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
817 #---------------- again for no-D | 811 #---------------- again for no-D |
818 | 812 |
819 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | 813 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, |
820 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 814 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
821 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 815 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
822 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 816 N1=mean(.SD$N.REGION.nt.nb, na.rm=T), |
823 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 817 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
824 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 818 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
825 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 819 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
826 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 820 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), |
827 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 821 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
828 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 822 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
829 by=c("Sample")]) | 823 by=c("Sample")]) |
830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 824 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
831 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 825 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
832 | 826 |
833 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | 827 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, |
834 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 828 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
835 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 829 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
836 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 830 N1=median(.SD$N.REGION.nt.nb, na.rm=T), |
837 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 831 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
838 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 832 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
839 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 833 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
840 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 834 Total.N=median(.SD$N.REGION.nt.nb, na.rm=T), |
841 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 835 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
842 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 836 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
843 by=c("Sample")]) | 837 by=c("Sample")]) |
844 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 838 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
845 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 839 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
846 | 840 |
841 print(paste("mean N:", mean(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) | |
842 | |
847 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, | 843 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
848 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 844 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
849 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 845 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
850 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 846 N1=mean(.SD$N.REGION.nt.nb, na.rm=T), |
851 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 847 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
852 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 848 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
853 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 849 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
854 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 850 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), |
855 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 851 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
856 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 852 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
857 by=c("Sample")]) | 853 by=c("Sample")]) |
858 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 854 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
859 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 855 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
860 | 856 |
857 print(paste("median N:", num_median(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) | |
858 | |
861 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, | 859 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
862 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 860 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
863 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 861 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
864 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 862 N1=median(.SD$N.REGION.nt.nb, na.rm=T), |
865 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 863 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
866 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 864 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
867 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 865 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
868 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 866 Total.N=median(.SD$N.REGION.nt.nb, na.rm=T), |
869 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 867 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
870 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 868 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
871 by=c("Sample")]) | 869 by=c("Sample")]) |
872 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 870 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
873 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 871 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
958 clonaltype = clonaltype[-which(clonaltype == "Sample")] | 956 clonaltype = clonaltype[-which(clonaltype == "Sample")] |
959 | 957 |
960 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":")) | 958 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":")) |
961 clonaltype.in.replicates = clonaltype.in.replicates[,c("clonaltype","Replicate", "ID", "Sequence", "Sample")] | 959 clonaltype.in.replicates = clonaltype.in.replicates[,c("clonaltype","Replicate", "ID", "Sequence", "Sample")] |
962 | 960 |
963 print(head(clonaltype.in.replicates)) | |
964 | |
965 clonaltype.counts = data.frame(table(clonaltype.in.replicates$clonaltype)) | 961 clonaltype.counts = data.frame(table(clonaltype.in.replicates$clonaltype)) |
966 names(clonaltype.counts) = c("clonaltype", "coincidence") | 962 names(clonaltype.counts) = c("clonaltype", "coincidence") |
967 | 963 |
968 print(head(clonaltype.counts)) | |
969 | |
970 clonaltype.counts = clonaltype.counts[clonaltype.counts$coincidence > 1,] | 964 clonaltype.counts = clonaltype.counts[clonaltype.counts$coincidence > 1,] |
971 | |
972 head(clonaltype.counts) | |
973 | 965 |
974 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$clonaltype %in% clonaltype.counts$clonaltype,] | 966 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$clonaltype %in% clonaltype.counts$clonaltype,] |
975 clonaltype.in.replicates = merge(clonaltype.in.replicates, clonaltype.counts, by="clonaltype") | 967 clonaltype.in.replicates = merge(clonaltype.in.replicates, clonaltype.counts, by="clonaltype") |
976 print(head(clonaltype.in.replicates)) | |
977 clonaltype.in.replicates = clonaltype.in.replicates[order(clonaltype.in.replicates$clonaltype),c("coincidence","clonaltype", "Sample", "Replicate", "ID", "Sequence")] | 968 clonaltype.in.replicates = clonaltype.in.replicates[order(clonaltype.in.replicates$clonaltype),c("coincidence","clonaltype", "Sample", "Replicate", "ID", "Sequence")] |
978 | 969 |
979 write.table(clonaltype.in.replicates, "clonaltypes_replicates.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | 970 write.table(clonaltype.in.replicates, "clonaltypes_replicates.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) |
980 | 971 |
981 | 972 |