Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 38:b6936fb52ab9 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Wed, 19 Apr 2017 10:21:01 -0400 |
| parents | f37e072affc0 |
| children | 106275b54470 |
comparison
equal
deleted
inserted
replaced
| 37:f37e072affc0 | 38:b6936fb52ab9 |
|---|---|
| 766 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 766 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
| 767 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 767 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 768 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 768 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 769 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 769 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 770 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 770 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 771 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), | 771 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
| 772 by=c("Sample")]) | 772 by=c("Sample")]) |
| 773 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 773 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 774 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 774 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 775 | 775 |
| 776 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 776 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, |
| 785 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 785 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 786 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 786 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 787 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 787 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 788 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 788 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 789 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 789 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 790 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), | 790 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
| 791 by=c("Sample")]) | 791 by=c("Sample")]) |
| 792 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 792 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 793 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 793 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 794 | 794 |
| 795 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, | 795 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, |
| 804 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 804 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
| 805 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 805 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 806 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 806 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 807 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 807 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 808 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 808 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 809 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 809 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
| 810 by=c("Sample")]) | 810 by=c("Sample")]) |
| 811 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 811 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 812 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 812 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 813 | 813 |
| 814 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, | 814 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, |
| 823 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 823 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 824 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 824 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 825 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 825 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 826 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 826 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
| 827 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 827 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 828 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 828 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
| 829 by=c("Sample")]) | 829 by=c("Sample")]) |
| 830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 831 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 831 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 832 | 832 |
| 833 #---------------- again for no-D | 833 #---------------- again for no-D |
| 839 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 839 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
| 840 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 840 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 841 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 841 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 842 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), | 842 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), |
| 843 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 843 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 844 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 844 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
| 845 by=c("Sample")]) | 845 by=c("Sample")]) |
| 846 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 846 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 847 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 847 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 848 | 848 |
| 849 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | 849 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, |
| 853 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 853 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 854 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 854 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 855 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 855 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 856 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), | 856 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), |
| 857 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 857 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 858 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 858 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
| 859 by=c("Sample")]) | 859 by=c("Sample")]) |
| 860 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 860 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 861 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 861 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 862 | |
| 863 print(paste("mean N:", mean(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) | |
| 864 | 862 |
| 865 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, | 863 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
| 866 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 864 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 867 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 865 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 868 N1=mean(.SD$N.REGION.nt.nb, na.rm=T), | 866 N1=mean(.SD$N.REGION.nt.nb, na.rm=T), |
| 869 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 867 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
| 870 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 868 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 871 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 869 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 872 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), | 870 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), |
| 873 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 871 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 874 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 872 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
| 875 by=c("Sample")]) | 873 by=c("Sample")]) |
| 876 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 874 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 877 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 875 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 878 | 876 |
| 879 print(paste("median N:", num_median(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) | |
| 880 | 877 |
| 881 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, | 878 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
| 882 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 879 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 883 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 880 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
| 884 N1=num_median(.SD$N.REGION.nt.nb, na.rm=T), | 881 N1=num_median(.SD$N.REGION.nt.nb, na.rm=T), |
| 885 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 882 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 886 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 883 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 887 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 884 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
| 888 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), | 885 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), |
| 889 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 886 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
| 890 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 887 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
| 891 by=c("Sample")]) | 888 by=c("Sample")]) |
| 892 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 889 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 893 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 890 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 894 } | 891 } |
| 895 | 892 |
| 924 png("DReadingFrame.png") | 921 png("DReadingFrame.png") |
| 925 D.REGION.reading.frame | 922 D.REGION.reading.frame |
| 926 dev.off() | 923 dev.off() |
| 927 | 924 |
| 928 ggsave("DReadingFrame.pdf", D.REGION.reading.frame) | 925 ggsave("DReadingFrame.pdf", D.REGION.reading.frame) |
| 929 | |
| 930 | 926 |
| 931 # ---------------------- AA composition in CDR3 ---------------------- | 927 # ---------------------- AA composition in CDR3 ---------------------- |
| 932 | 928 |
| 933 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] | 929 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] |
| 934 | 930 |
| 975 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 971 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
| 976 | 972 |
| 977 | 973 |
| 978 #generate the "Sequences that are present in more than one replicate" dataset | 974 #generate the "Sequences that are present in more than one replicate" dataset |
| 979 clonaltype.in.replicates = inputdata | 975 clonaltype.in.replicates = inputdata |
| 976 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$Functionality %in% c("productive (see comment)","productive"),] | |
| 980 clonaltype.in.replicates = na.omit(clonaltype.in.replicates) | 977 clonaltype.in.replicates = na.omit(clonaltype.in.replicates) |
| 981 clonaltype = unlist(strsplit(clonaltype, ",")) | 978 clonaltype = unlist(strsplit(clonaltype, ",")) |
| 982 | 979 |
| 983 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[c(clonaltype, "Replicate")], sep = ":")) | 980 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[c(clonaltype, "Replicate")], sep = ":")) |
| 984 | 981 |
| 1021 | 1018 |
| 1022 | 1019 |
| 1023 | 1020 |
| 1024 | 1021 |
| 1025 | 1022 |
| 1026 | |
| 1027 |
