Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 25:94765af0db1f draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Thu, 09 Feb 2017 07:20:09 -0500 |
| parents | d5d203d38c8a |
| children | 28fbbdfd7a87 |
comparison
equal
deleted
inserted
replaced
| 24:d5d203d38c8a | 25:94765af0db1f |
|---|---|
| 156 | 156 |
| 157 | 157 |
| 158 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive | 158 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive |
| 159 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) | 159 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 160 #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) | 160 #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) |
| 161 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) | 161 write.table(UNPROD, "allUnproductive.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 162 | 162 |
| 163 print("SAMPLE TABLE:") | 163 print("SAMPLE TABLE:") |
| 164 print(table(PRODF$Sample)) | 164 print(table(PRODF$Sample)) |
| 165 | 165 |
| 166 #write the samples to a file | 166 #write the samples to a file |
| 695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) | 695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) |
| 696 } | 696 } |
| 697 } | 697 } |
| 698 | 698 |
| 699 bak = PRODF | 699 bak = PRODF |
| 700 bakun = UNPROD | |
| 700 | 701 |
| 701 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") | 702 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") |
| 702 if(all(imgtcolumns %in% colnames(inputdata))) | 703 if(all(imgtcolumns %in% colnames(inputdata))) |
| 703 { | 704 { |
| 704 print("found IMGT columns, running junction analysis") | 705 print("found IMGT columns, running junction analysis") |
| 722 } | 723 } |
| 723 | 724 |
| 724 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,] | 725 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,] |
| 725 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,] | 726 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,] |
| 726 | 727 |
| 728 UNPROD.with.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) > 2,] | |
| 729 UNPROD.no.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) < 4,] | |
| 730 | |
| 727 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } | 731 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } |
| 728 | 732 |
| 733 print("---- table prod.with.d cdr3.length ----") | |
| 734 print(table(PRODF.with.D$CDR3.Length, useNA="ifany")) | |
| 735 print(median(PRODF.with.D$CDR3.Length, na.rm=T)) | |
| 736 | |
| 729 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 737 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, |
| 730 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 738 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 731 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 739 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 732 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 740 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
| 733 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 741 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
| 738 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 746 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
| 739 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 747 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 740 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 748 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 741 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 749 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 742 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 750 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 743 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 751 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), |
| 744 by=c("Sample")]) | 752 by=c("Sample")]) |
| 745 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 753 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 746 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 754 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 747 | 755 |
| 748 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 756 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, |
| 757 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 765 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 758 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 766 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 759 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 767 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 760 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 768 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 761 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 769 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 762 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 770 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), |
| 763 by=c("Sample")]) | 771 by=c("Sample")]) |
| 764 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 772 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 765 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 773 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 766 | 774 |
| 767 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 775 print("---- table unprod.with.d cdr3.length ----") |
| 776 print(table(UNPROD.with.D$CDR3.Length, useNA="ifany")) | |
| 777 print(median(UNPROD.with.D$CDR3.Length, na.rm=T)) | |
| 778 | |
| 779 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, | |
| 768 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 780 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 769 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 781 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 770 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 782 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
| 771 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 783 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
| 772 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 784 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
| 776 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 788 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
| 777 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 789 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 778 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 790 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 779 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 791 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 780 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 792 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 781 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 793 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 782 by=c("Sample")]) | 794 by=c("Sample")]) |
| 783 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 795 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 784 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 796 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 785 | 797 |
| 786 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 798 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, |
| 787 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 799 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 788 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 800 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
| 789 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), | 801 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), |
| 790 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 802 P2=num_median(.SD$P5D.nt.nb, na.rm=T), |
| 791 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 803 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
| 795 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 807 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 796 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 808 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 797 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 809 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 798 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 810 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 799 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 811 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 800 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 812 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 801 by=c("Sample")]) | 813 by=c("Sample")]) |
| 802 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 814 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 803 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 815 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 804 | 816 |
| 805 #---------------- again for no-D | 817 #---------------- again for no-D |
| 811 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 823 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
| 812 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 824 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 813 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 825 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 814 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 826 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), |
| 815 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 827 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 816 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 828 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 817 by=c("Sample")]) | 829 by=c("Sample")]) |
| 818 print(newData) | |
| 819 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 820 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 831 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 821 | 832 |
| 822 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | 833 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, |
| 823 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 834 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 826 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 837 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 827 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 838 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 828 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 839 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 829 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 840 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), |
| 830 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 841 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 831 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 842 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 832 by=c("Sample")]) | 843 by=c("Sample")]) |
| 833 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 844 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 834 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 845 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 835 | 846 |
| 836 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | 847 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
| 837 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 848 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 838 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 849 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 839 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 850 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), |
| 840 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 851 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
| 841 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 852 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 842 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 853 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 843 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 854 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), |
| 844 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 855 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 845 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 856 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 846 by=c("Sample")]) | 857 by=c("Sample")]) |
| 847 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 858 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 848 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 859 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 849 | 860 |
| 850 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | 861 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
| 851 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 862 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 852 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 863 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
| 853 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 864 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), |
| 854 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 865 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 855 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 866 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 856 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 867 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 857 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), | 868 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), |
| 858 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 869 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 859 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 870 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), |
| 860 by=c("Sample")]) | 871 by=c("Sample")]) |
| 861 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 872 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 862 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 873 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 863 } | 874 } |
| 864 | 875 |
| 865 PRODF = bak | 876 PRODF = bak |
| 877 UNPROD = bakun | |
| 866 | 878 |
| 867 | 879 |
| 868 # ---------------------- D reading frame ---------------------- | 880 # ---------------------- D reading frame ---------------------- |
| 869 | 881 |
| 870 D.REGION.reading.frame = PRODF[,c("Sample", "D.REGION.reading.frame")] | 882 D.REGION.reading.frame = PRODF[,c("Sample", "D.REGION.reading.frame")] |
| 937 # ---------------------- AA median CDR3 length ---------------------- | 949 # ---------------------- AA median CDR3 length ---------------------- |
| 938 | 950 |
| 939 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")]) | 951 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")]) |
| 940 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 952 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 941 | 953 |
| 954 | |
| 955 #generate the "Sequences that are present in more than one replicate" dataset | |
| 956 clonaltype.in.replicates = inputdata | |
| 957 clonaltype = unlist(strsplit(clonaltype, ",")) | |
| 958 clonaltype = clonaltype[-which(clonaltype == "Sample")] | |
| 959 | |
| 960 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":")) | |
| 961 clonaltype.in.replicates = clonaltype.in.replicates[,c("clonaltype","Replicate", "ID", "Sequence", "Sample")] | |
| 962 | |
| 963 print(head(clonaltype.in.replicates)) | |
| 964 | |
| 965 clonaltype.counts = data.frame(table(clonaltype.in.replicates$clonaltype)) | |
| 966 names(clonaltype.counts) = c("clonaltype", "coincidence") | |
| 967 | |
| 968 print(head(clonaltype.counts)) | |
| 969 | |
| 970 clonaltype.counts = clonaltype.counts[clonaltype.counts$coincidence > 1,] | |
| 971 | |
| 972 head(clonaltype.counts) | |
| 973 | |
| 974 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$clonaltype %in% clonaltype.counts$clonaltype,] | |
| 975 clonaltype.in.replicates = merge(clonaltype.in.replicates, clonaltype.counts, by="clonaltype") | |
| 976 print(head(clonaltype.in.replicates)) | |
| 977 clonaltype.in.replicates = clonaltype.in.replicates[order(clonaltype.in.replicates$clonaltype),c("coincidence","clonaltype", "Sample", "Replicate", "ID", "Sequence")] | |
| 978 | |
| 979 write.table(clonaltype.in.replicates, "clonaltypes_replicates.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) | |
| 980 | |
| 981 | |
| 982 | |
| 983 | |
| 984 | |
| 985 | |
| 986 | |
| 987 | |
| 988 | |
| 989 | |
| 990 | |
| 991 | |
| 992 | |
| 993 | |
| 994 | |
| 995 | |
| 996 | |
| 997 | |
| 998 | |
| 999 | |
| 1000 | |
| 1001 | |
| 1002 | |
| 1003 | |
| 1004 |
