annotate batchcorrection-57edfd3943ab/Normalisation_QCpool.r @ 3:73892ef177e3 draft

Uploaded
author melpetera
date Tue, 02 May 2017 09:47:22 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
1 # Author: jfmartin
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
2 # Modified by : mpetera
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
3 ###############################################################################
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
4 # Correction of analytical effects inter and intra batch on intensities using quality control pooled samples (QC-pools)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
5 # according to the algorithm mentioned by Van der Kloet (J Prot Res 2009).
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
6 # Parameters : a dataframe of Ions intensities and an other of samples? metadata which must contains at least the three following columns :
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
7 # "batch" to identify the batches of analyses ; need at least 3 QC-pools for linear adjustment and 8 for lo(w)ess adjustment
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
8 # "injectionOrder" integer defining the injection order of all samples : QC-pools and analysed samples
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
9 # "sampleType" indicates if defining a sample with "sample" or a QC-pool with "pool"
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
10 # NO MISSING DATA are allowed
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
11 # Version 0.91 insertion of ok_norm function to assess correction feasibility
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
12 # Version 0.92 insertion of slope test in ok_norm
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
13 # Version 0.93 name of log file define as a parameter of the correction function
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
14 # Version 0.94 Within a batch, test if all QCpools or samples values = 0. Definition of an error code in ok_norm function (see function for details)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
15 # Version 0.99 include non linear lowess correction.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
16 # Version 1.00 the corrected result matrix is return transposed in Galaxy
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
17 # Version 1.01 standard deviation=0 instead of sum of value=0 is used to assess constant data in ok_norm function. Negative values in corrected matrix are converted to 0.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
18 # Version 1.02 plotsituation create a result file with the error code of non execution of correction set by function ok_norm
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
19 # Version 1.03 fix bug in plot with "reg" option. suppression of ok_norm=4 condition if ok_norm function
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
20 # Version 2.00 Addition of loess function, correction indicator, plots ; modification of returned objects' format, some plots' displays and ok_norm ifelse format
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
21 # Version 2.01 Correction for pools negative values earlier in norm_QCpool
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
22 # Version 2.10 Script refreshing ; vocabulary adjustment ; span in parameters for lo(w)ess regression ; conditionning for third line ACP display ; order in loess display
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
23 # Version 2.11 ok1 and ok2 permutation (ok_norm) ; conditional display of regression (plotsituation) ; grouping of linked lignes + conditioning (normX) ; conditioning for CVplot
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
24 # Version 2.20 acplight function added from previous toolBox.R [# Version 1.01 "NA"-coding possibility added in acplight function]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
25 # Version 2.30 addition of suppressWarnings() for known and controlled warnings ; suppression of one useless "cat" message ; change in Rdata names ; 'batch(es)' in cat
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
26 # Version 2.90 change in handling of generated negative and Inf values
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
27 # Version 2.91 Plot improvement
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
28
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
29 ok_norm=function(qcp,qci,spl,spi,method) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
30 # Function used for one ion within one batch to determine whether or not batch correction is possible
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
31 # ok_norm values :
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
32 # 0 : no preliminary-condition problem
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
33 # 1 : standard deviation of QC-pools or samples = 0
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
34 # 2 : insufficient number of QC-pools within a batch (n=3 for linear, n=8 for lowess or loess)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
35 # 3 : significant difference between QC-pools' and samples' means
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
36 # 4 : denominator =0 when on 1 pool per batch <> 0
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
37 # 5 : (linear regression only) the slopes ratio ?QC-pools/samples? is lower than -0.2
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
38
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
39 ok=0
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
40 if (method=="linear") {minQC=3} else {minQC=8}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
41 if (length(qcp)<minQC) { ok=2
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
42 } else {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
43 if (sd(qcp)==0 | sd(spl)==0) { ok=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
44 } else {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
45 cvp= sd(qcp)/mean(qcp); cvs=sd(spl)/mean(spl)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
46 rttest=t.test(qcp,y=spl)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
47 reslsfit=lsfit(qci, qcp)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
48 reslsfitSample=lsfit(spl, spi)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
49 ordori=reslsfit$coefficients[1]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
50 penteB=reslsfit$coefficients[2]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
51 penteS=reslsfitSample$coefficients[2]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
52 # Significant difference between samples and pools
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
53 if (rttest$p.value < 0.01) { ok=3
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
54 } else {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
55 # to avoid denominator =0 when on 1 pool per batch <> 0
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
56 if (method=="linear" & length(which(((penteB*qci)+ordori)==0))>0 ){ ok=6
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
57 } else {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
58 # different sloop between samples and pools
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
59 if (method=="linear" & penteB/penteS < -0.20) { ok=5 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
60 }}}}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
61 ok_norm=ok
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
62 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
63
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
64 plotsituation <- function (x, nbid,outfic="plot_regression.pdf", outres="PreNormSummary.txt",fact="batch",span="none") {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
65 # Check for all ions in every batch if linear or lo(w)ess correction is possible.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
66 # Use ok_norm function and create a file (PreNormSummary.txt) with the error code.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
67 # Also create a pdf file with plots of linear and lo(w)ess regression lines.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
68 # x: dataframe with ions in columns and samples in rows ; x is the result of concatenation of sample metadata file and ions file
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
69 # nbid: number of samples description columns (id and factors) with at least : "batch","injectionOrder","sampleType"
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
70 # outfic: name of regression plots pdf file
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
71 # fact: factor to be used as categorical variable for plots and PCA.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
72 indfact =which(dimnames(x)[[2]]==fact)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
73 indtypsamp =which(dimnames(x)[[2]]=="sampleType")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
74 indbatch =which(dimnames(x)[[2]]=="batch")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
75 indinject =which(dimnames(x)[[2]]=="injectionOrder")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
76 lastIon=dim(x)[2]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
77 nbi=lastIon-nbid # Number of ions = total number of columns - number of identifying columns
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
78 nbb=length(levels(x$batch)) # Number of batch = number of levels of "batch" comlumn (factor)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
79 nbs=length(x$sampleType[x$sampleType=="sample"])# Number of samples = number of rows with "sample" value in sampleType
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
80 pdf(outfic,width=27,height=7*ceiling((nbb+2)/3))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
81 cat(nbi," ions ",nbb," batch(es) \n")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
82 cv=data.frame(matrix(0,nrow=nbi,ncol=2))# initialisation de la dataset qui contiendra les CV
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
83 pre_bilan=matrix(0,nrow=nbi,ncol=3*nbb) # dataset of ok_norm function results
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
84 for (p in 1:nbi) {# for each ion
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
85 par (mfrow=c(ceiling((nbb+2)/3),3),ask=F,cex=1.2)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
86 labion=dimnames(x)[[2]][p+nbid]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
87 indpool=which(x$sampleType=="pool") # QCpools subscripts in x
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
88 pools1=x[indpool,p+nbid]; cv[p,1]=sd(pools1)/mean(pools1)# CV before correction
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
89 for (b in 1:nbb) {# for each batch...
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
90 xb=data.frame(x[(x$batch==levels(x$batch)[b]),c(indtypsamp,indinject,p+nbid)])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
91 indpb = which(xb$sampleType=="pool")# QCpools subscripts in the current batch
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
92 indsp = which(xb$sampleType=="sample")# samples subscripts in the current batch
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
93 indbt = which(xb$sampleType=="sample" | xb$sampleType=="pool")# indices de tous les samples d'un batch pools+samples
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
94 normLinearTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="linear")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
95 normLoessTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="loess")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
96 normLowessTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="lowess")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
97 #cat(dimnames(x)[[2]][p+nbid]," batch ",b," loess ",normLoessTest," linear ",normLinearTest,"\n")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
98 pre_bilan[ p,3*b-2]=normLinearTest
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
99 pre_bilan[ p,3*b-1]=normLoessTest
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
100 pre_bilan[ p,3*b]=normLowessTest
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
101 if(length(indpb)>1){
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
102 if(span=="none"){span1<-1 ; span2<-2*length(indpool)/nbs}else{span1<-span ; span2<-span}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
103 resloess=loess(xb[indpb,3]~xb[indpb,2],span=span1,degree=2,family="gaussian",iterations=4,surface="direct")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
104 resloessSample=loess(xb[indsp,3]~xb[indsp,2],span=2*length(indpool)/nbs,degree=2,family="gaussian",iterations=4,surface="direct")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
105 reslowess=lowess(xb[indpb,2],xb[indpb,3],f=span2)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
106 reslowessSample=lowess(xb[indsp,2],xb[indsp,3])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
107 liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
108 plot(xb[indsp,2],xb[indsp,3],pch=16, main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
109 points(xb[indpb,2], xb[indpb,3],pch=5)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
110 points(cbind(resloess$x,resloess$fitted)[order(resloess$x),],type="l",col="green3")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
111 points(cbind(resloessSample$x,resloessSample$fitted)[order(resloessSample$x),],type="l",col="green3",lty=2)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
112 points(reslowess,type="l",col="red"); points(reslowessSample,type="l",col="red",lty=2)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
113 abline(lsfit(xb[indpb,2],xb[indpb,3]),col="blue")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
114 abline(lsfit(xb[indsp,2],xb[indsp,3]),lty=2,col="blue")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
115 legend("topleft",c("pools","samples"),lty=c(1,2),bty="n")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
116 legend("topright",c("linear","lowess","loess"),lty=1,col=c("blue","red","green3"),bty="n")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
117 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
118 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
119 # series de plot avant et apres correction
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
120 minval=min(x[p+nbid]);maxval=max(x[p+nbid])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
121 plot( x$injectionOrder, x[,p+nbid],col=x$batch,ylim=c(minval,maxval),ylab=labion,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
122 main=paste0("before correction (CV for pools = ",round(cv[p,1],2),")"))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
123 suppressWarnings(plot.design( x[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect before correction"))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
124 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
125 dev.off()
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
126 pre_bilan=data.frame(pre_bilan)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
127 labion=dimnames(x)[[2]][nbid+1:nbi]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
128 for (i in 1:nbb) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
129 dimnames(pre_bilan)[[2]][3*i-2]=paste("batch",i,"linear")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
130 dimnames(pre_bilan)[[2]][3*i-1]=paste("batch",i,"loess")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
131 dimnames(pre_bilan)[[2]][3*i]=paste("batch",i,"lowess")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
132 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
133 bilan=data.frame(labion,pre_bilan)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
134 write.table(bilan,file=outres,sep="\t",row.names=F,quote=F)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
135 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
136
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
137
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
138 normlowess=function (xb,detail="no",vref=1,b,span=NULL) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
139 # Correction function applied to 1 ion in 1 batch. Use a lowess regression computed on QC-pools in order to correct samples intensity values
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
140 # xb : dataframe for 1 ion in columns and samples in rows.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
141 # vref : reference value (average of ion)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
142 # b : batch subscript
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
143 # nbid: number of samples description columns (id and factors) with at least : "batch","injectionOrder","sampleType"
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
144 indpb = which(xb$sampleType=="pool") # pools subscripts of current batch
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
145 indsp = which(xb$sampleType=="sample") # samples of current batch subscripts
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
146 indbt = which(xb$sampleType=="sample" | xb$sampleType=="pool");# batch subscripts of all samples and QC-pools
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
147 labion=dimnames(xb)[[2]][3]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
148 newval=xb[[3]] # initialisation of corrected values = intial values
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
149 ind <- 0 # initialisation of correction indicator
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
150 normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="lowess")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
151 #cat("batch:",b," dim xb=",dim(xb)," ok=",normTodo,"\n")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
152 if (normTodo==0) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
153 if(length(span)==0){span2<-2*length(indpb)/length(indsp)}else{span2<-span}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
154 reslowess=lowess(xb[indpb,2],xb[indpb,3],f=span2) # lowess regression with QC-pools
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
155 px=xb[indsp,2]; # vector of injectionOrder values only for samples
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
156 for(j in 1:length(indbt)) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
157 if (xb$sampleType[j]=="pool") {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
158 if (reslowess$y[which(indpb==j)]==0) reslowess$y[which(indpb==j)] <- 1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
159 newval[j]=(vref*xb[j,3]) / (reslowess$y[which(indpb==j)])}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
160 else { # for samples, the correction value cor correspond to the nearest QCpools
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
161 cor= reslowess$y[which(abs(reslowess$x-px[which(indsp==j)])==min(abs(reslowess$x - px[which(indsp==j)])))]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
162 if (length(cor)>1) {cor=cor[1]}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
163 if (cor <= 0) {cor=vref} # no modification of initial value
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
164 newval[j]=(vref*xb[j,3]) / cor
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
165 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
166 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
167 if (detail=="reg") {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
168 liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
169 plot(xb[indsp,2],xb[indsp,3],pch=16,main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
170 points(xb[indpb,2], xb[indpb,3],pch=5)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
171 points(reslowess,type="l",col="red")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
172 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
173 ind <- 1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
174 } else {# if ok_norm <> 0 , we perform a correction based on batch samples average
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
175 moySample=mean(xb[indsp,3]);if (moySample==0) moySample=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
176 newval[indsp] = (vref*xb[indsp,3])/moySample
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
177 if(length(indpb)>0){
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
178 moypool=mean(xb[indpb,3]) ; if (moypool==0) moypool=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
179 newval[indpb] = (vref*xb[indpb,3])/moypool
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
180 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
181 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
182 newval <- list(norm.ion=newval,norm.ind=ind)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
183 return(newval)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
184 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
185
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
186 normlinear <- function (xb,detail="no",vref=1,b,valneg=0) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
187 # Correction function applied to 1 ion in 1 batch.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
188 # Use a linear regression computed on QC-pools in order to correct samples intensity values
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
189 # xb: dataframe with ions in columns and samples in rows; x is a result of concatenation of sample metadata file and ion file
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
190 # nbid: number of sample description columns (id and factors) with at least "batch", "injectionOrder" and "sampleType"
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
191 # b: which batch it is
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
192 # valneg: to determine what to do with generated negative and Inf values
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
193 indpb = which(xb$sampleType=="pool")# pools subscripts of current batch
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
194 indsp = which(xb$sampleType=="sample")# samples of current batch subscripts
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
195 indbt = which(xb$sampleType=="sample" | xb$sampleType=="pool") # QCpools and samples of current batch subscripts
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
196 labion=dimnames(xb)[[2]][3]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
197 newval=xb[[3]] # initialisation of corrected values = intial values
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
198 ind <- 0 # initialisation of correction indicator
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
199 normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="linear")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
200 if (normTodo==0) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
201 ind <- 1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
202 reslsfit=lsfit(xb[indpb,2],xb[indpb,3]) # linear regression for QCpools
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
203 reslsfitSample=lsfit(xb[indsp,2],xb[indsp,3]) # linear regression for samples
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
204 ordori=reslsfit$coefficients[1]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
205 pente=reslsfit$coefficients[2]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
206 if (detail=="reg") {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
207 liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
208 plot(xb[indsp,2],xb[indsp,3],pch=16,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
209 main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
210 points(xb[indpb,2], xb[indpb,3],pch=5)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
211 abline(reslsfit)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
212 abline(reslsfitSample,lty=2)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
213 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
214 # correction with rescaling of ion global intensity (vref)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
215 newval = (vref*xb[indbt,3]) / (pente * (xb[indbt,2]) + ordori)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
216 newval[which((pente * (xb[indbt,2]) + ordori)<1)] <- -1 # to handle cases where 0<denominator<1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
217 # handling if any negative values (or null denominators)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
218 if(length(which((newval==Inf)|(newval<0)))!=0){
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
219 toajust <- which((newval==Inf)|(newval<0))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
220 if(valneg=="NA"){
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
221 newval[toajust] <- NA
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
222 } else {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
223 newval[toajust] <- vref * (xb[indbt,3][toajust]) / mean(xb[indbt,3])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
224 ### Other possibility
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
225 ## if(pente>0){ # slope orientation
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
226 ## newval[toajust]<-(vref*(xb[indbt,3][toajust]))/(pente*ceiling(-ordori/pente+1.00001)+ordori)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
227 ## }else{
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
228 ## newval[toajust]<-(vref*(xb[indbt,3][toajust]))/(pente*floor(-ordori/pente-1.00001)+ordori)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
229 ## }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
230 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
231 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
232 } else {# if ok_norm!=0 , we perform a correction based on batch samples average.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
233 moySample=mean(xb[indsp,3]); if (moySample==0) moySample=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
234 newval[indsp] = (vref*xb[indsp,3])/moySample
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
235 if(length(indpb)>0){
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
236 moypool=mean(xb[indpb,3]) ; if (moypool==0) moypool=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
237 newval[indpb] = (vref*xb[indpb,3])/moypool
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
238 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
239 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
240 newval <- list(norm.ion=newval,norm.ind=ind)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
241 return(newval)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
242 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
243
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
244
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
245 normloess <- function (xb,detail="no",vref=1,b,span=NULL) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
246 # Correction function applied to 1 ion in 1 batch.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
247 # Use a loess regression computed on QC-pools in order to correct samples intensity values.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
248 # xb : dataframe for 1 ion in columns and samples in rows.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
249 # detail : level of detail in the outlog file.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
250 # vref : reference value (average of ion)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
251 # b : batch subscript
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
252 indpb = which(xb$sampleType=="pool") # pools subscripts of current batch
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
253 indsp = which(xb$sampleType=="sample") # samples of current batch subscripts
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
254 indbt = which(xb$sampleType=="sample" | xb$sampleType=="pool");# batch subscripts of all samples and QCpools
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
255 labion=dimnames(xb)[[2]][3]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
256 newval=xb[[3]] # initialisation of corrected values = intial values
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
257 ind <- 0 # initialisation of correction indicator
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
258 normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="loess")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
259 #cat("batch:",b," dim xb=",dim(xb)," ok=",normTodo,"\n")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
260 if (normTodo==0) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
261 if(length(span)==0){span1<-1}else{span1<-span}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
262 resloess=loess(xb[indpb,3]~xb[indpb,2],span=span1,degree=2,family="gaussian",iterations=4,surface="direct") # loess regression with QCpools
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
263 cor=predict(resloess,newdata=xb[,2])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
264 cor[cor<=1] <- 1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
265 newval=(vref*xb[,3]) / cor
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
266 if(length(which(newval>3*(quantile(newval)[4])))>0){ # in this case no modification of initial value
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
267 newval <- xb[,3]} else {ind <- 1} # confirmation of correction
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
268 if ((detail=="reg")&(ind==1)) { # plot
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
269 liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
270 plot(xb[indsp,2],xb[indsp,3],pch=16,main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
271 points(xb[indpb,2], xb[indpb,3],pch=5)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
272 points(cbind(resloess$x,resloess$fitted)[order(resloess$x),],type="l",col="red")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
273 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
274 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
275 if (ind==0) {# if ok_norm != 0 or if correction creates outliers, we perform a correction based on batch samples average
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
276 moySample=mean(xb[indsp,3]);if (moySample==0) moySample=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
277 newval[indsp] = (vref*xb[indsp,3])/moySample
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
278 if(length(indpb)>0){
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
279 moypool=mean(xb[indpb,3]) ; if (moypool==0) moypool=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
280 newval[indpb] = (vref*xb[indpb,3])/moypool
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
281 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
282 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
283 newval <- list(norm.ion=newval,norm.ind=ind)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
284 return(newval)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
285 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
286
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
287
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
288
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
289 norm_QCpool <- function (x, nbid, outlog, fact, metaion, detail="no", NormMoyPool=F, NormInt=F, method="linear",span="none",valNull="0")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
290 {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
291 ### Correction applying linear or lo(w)ess correction function on all ions for every batch of a dataframe.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
292 # x: dataframe with ions in column and samples' metadata
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
293 # nbid: number of sample description columns (id and factors) with at least "batch", "injectionOrder", "sampleType"
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
294 # outlog: name of regression plots and PCA pdf file
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
295 # fact: factor to be used as categorical variable for plots
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
296 # metaion: dataframe of ions' metadata
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
297 # detail: level of detail in the outlog file. detail="no" ACP + boxplot of CV before and after correction.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
298 # detail="plot" with plot for all batch before and after correction.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
299 # detail="reg" with added plots with regression lines for all batches.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
300 # NormMoyPool: not used
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
301 # NormInt: not used
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
302 # method: regression method to be used to correct : "linear" or "lowess" or "loess"
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
303 # valNull: to determine what to do with negatively estimated intensities
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
304 indfact =which(dimnames(x)[[2]]==fact)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
305 indtypsamp=which(dimnames(x)[[2]]=="sampleType")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
306 indbatch =which(dimnames(x)[[2]]=="batch")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
307 indinject =which(dimnames(x)[[2]]=="injectionOrder")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
308 lastIon=dim(x)[2]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
309 valref=apply(as.matrix(x[,(nbid+1):(lastIon)]),2,mean) # reference value for each ion used to still have the same rought size of values
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
310 nbi=lastIon-nbid # number of ions
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
311 nbb=length(levels(x$batch)) # Number of batch(es) = number of levels of factor "batch" (can be =1)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
312 nbs=length(x$sampleType[x$sampleType=="sample"])# Number of samples
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
313 nbp=length(x$sampleType[x$sampleType=="pool"])# Number of QCpools
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
314 Xn=data.frame(x[,c(1:nbid)],matrix(0,nrow=nbp+nbs,ncol=nbi))# initialisation of the corrected dataframe (=initial dataframe)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
315 dimnames(Xn)=dimnames(x)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
316 cv=data.frame(matrix(0,nrow=nbi,ncol=2))# initialisation of dataframe containing CV before and after correction
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
317 dimnames(cv)[[2]]=c("avant","apres")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
318 if (detail!="reg" && detail!="plot" && detail!="no") {detail="no"}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
319 pdf(outlog,width=27,height=20)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
320 cat(nbi," ions ",nbb," batch(es) \n")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
321 if (detail=="plot") {if(nbb<6){par(mfrow=c(3,3),ask=F,cex=1.5)}else{par(mfrow=c(4,4),ask=F,cex=1.5)}}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
322 res.ind <- matrix(NA,ncol=nbb,nrow=nbi,dimnames=list(dimnames(x)[[2]][-c(1:nbid)],paste("norm.b",1:nbb,sep="")))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
323 for (p in 1:nbi) {# for each ion
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
324 labion=dimnames(x)[[2]][p+nbid]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
325 if (detail == "reg") {if(nbb<6){par(mfrow=c(3,3),ask=F,cex=1.5)}else{par(mfrow=c(4,4),ask=F,cex=1.5)}}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
326 indpool=which(x$sampleType=="pool")# QCpools subscripts in all batches
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
327 pools1=x[indpool,p+nbid]; cv[p,1]=sd(pools1)/mean(pools1)# CV before correction
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
328 for (b in 1:nbb) {# for every batch
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
329 indpb = which(x$batch==levels(x$batch)[b] & x$sampleType=="pool")# QCpools subscripts of the current batch
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
330 indsp = which(x$batch==levels(x$batch)[b] & x$sampleType=="sample")# samples subscripts of the current batch
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
331 indbt = which(x$batch==levels(x$batch)[b] & (x$sampleType=="pool" | x$sampleType=="sample")) # subscripts of all samples
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
332 # cat(dimnames(x)[[2]][p+nbid]," indsp:",length(indsp)," indpb=",length(indpb)," indbt=",length(indbt)," ")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
333 sub=data.frame(x[(x$batch==levels(x$batch)[b]),c(indtypsamp,indinject,p+nbid)])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
334 if (method=="linear") { res.norm = normlinear(sub,detail,valref[p],b,valNull)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
335 } else { if (method=="loess"){ res.norm <- normloess(sub,detail,valref[p],b,span)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
336 } else { if (method=="lowess"){ res.norm <- normlowess(sub,detail,valref[p],b,span)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
337 } else {stop("\n--\nNo valid 'method' argument supplied.\nMust be 'linear','loess' or 'lowess'.\n--\n")}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
338 }}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
339 Xn[indbt,p+nbid] = res.norm[[1]]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
340 res.ind[p,b] <- res.norm[[2]]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
341 # CV batch test : if after normaliszation, CV before < CV after initial values are kept
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
342 # moypoolRaw=mean(x[indpb,p+nbid]) ; if (moypoolRaw==0) moypoolRaw=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
343 # moySampleRaw=mean(x[indsp,p+nbid]); if (moySampleRaw==0) moySampleRaw=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
344 # moypool=mean(Xn[indpb,p+nbid]) ; if (moypool==0) moypool=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
345 # #moySample=mean(Xn[indsp,p+nbid]); if (moySample==0) moySample=1
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
346 # if (sd( Xn[indpb,p+nbid])/moypool>sd(x[indpb,p+nbid])/moypoolRaw) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
347 # Xn[indpb,p+nbid] = (valref[p]*x[indpb,p+nbid])/moypoolRaw
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
348 # Xn[indsp,p+nbid] = (valref[p]*x[indsp,p+nbid])/moySampleRaw
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
349 # }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
350 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
351 Xn[indpool,p+nbid][Xn[indpool,p+nbid]<0] <- 0
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
352 pools2=Xn[indpool,p+nbid]; cv[p,2]=sd(pools2,na.rm=TRUE)/mean(pools2,na.rm=TRUE)# CV apres correction
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
353 if (detail=="reg" || detail=="plot" ) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
354 # plot before and after correction
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
355 minval=min(cbind(x[p+nbid],Xn[p+nbid]),na.rm=TRUE);maxval=max(cbind(x[p+nbid],Xn[p+nbid]),na.rm=TRUE)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
356 plot( x$injectionOrder, x[,p+nbid],col=x$batch,ylab=labion,ylim=c(minval,maxval),
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
357 main=paste0("before correction (CV for pools = ",round(cv[p,1],2),")"))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
358 points(x$injectionOrder[indpool],x[indpool,p+nbid],col="maroon",pch=".",cex=2)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
359 plot(Xn$injectionOrder,Xn[,p+nbid],col=x$batch,ylab="",ylim=c(minval,maxval),
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
360 main=paste0("after correction (CV for pools = ",round(cv[p,2],2),")"))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
361 points(Xn$injectionOrder[indpool],Xn[indpool,p+nbid],col="maroon",pch=".",cex=2)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
362 suppressWarnings(plot.design( x[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect before correction"))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
363 suppressWarnings(plot.design(Xn[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect after correction"))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
364 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
365 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
366 ### Replacement of post correction negative values by chosen value
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
367 Xnn=Xn
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
368 for (i in c((nbid+1):dim(Xn)[2])) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
369 cneg=which(Xn[[i]]<0)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
370 Xnn[[i]]=replace(Xn[[i]],cneg,as.numeric(valNull))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
371 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
372 Xn=Xnn
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
373
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
374 if (detail=="reg" || detail=="plot" || detail=="no") {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
375 if (nbi > 3) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
376 par(mfrow=c(3,4),ask=F,cex=1.2) # PCA Plot before/after, normed only and ions plot
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
377 acplight(x[,c(indtypsamp,indbatch,indtypsamp,indfact,(nbid+1):lastIon)],"uv",TRUE)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
378 norm.ion <- which(colnames(Xn)%in%(rownames(res.ind)[which(rowSums(res.ind)>=1)]))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
379 acplight(Xn[,c(indtypsamp,indbatch,indtypsamp,indfact,(nbid+1):lastIon)],"uv",TRUE,norm.ion)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
380 if(length(norm.ion)>0){acplight(Xn[,c(indtypsamp,indbatch,indtypsamp,indfact,norm.ion)],"uv",TRUE)}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
381 par(mfrow=c(1,2),ask=F,cex=1.2) # Before/after boxplot
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
382 cvplot=cv[!is.na(cv[[1]])&!is.na(cv[[2]]),]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
383 if(nrow(cvplot)>0){
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
384 boxplot(cvplot[[1]],ylim=c(min(cvplot),max(cvplot)),main="CV before correction")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
385 boxplot(cvplot[[2]],ylim=c(min(cvplot),max(cvplot)),main="CV after correction")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
386 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
387 dev.off()
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
388 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
389 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
390 if (nbi<=3) {dev.off()}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
391 # transposed matrix is return (format of the initial matrix with ions in rows)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
392 Xr=Xn[,-c(1:nbid)]; dimnames(Xr)[[1]]=Xn[[1]]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
393 Xr=t(Xr) ; Xr <- data.frame(ions=rownames(Xr),Xr)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
394
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
395 res.norm[[1]] <- Xr ; res.norm[[2]] <- data.frame(metaion,res.ind) ; res.norm[[3]] <- x[,c(1:nbid)]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
396 names(res.norm) <- c("dataMatrix","variableMetadata","sampleMetadata")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
397 return(res.norm)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
398 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
399
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
400
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
401
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
402
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
403
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
404 acplight <- function(ids, scaling="uv", indiv=FALSE,indcol=NULL) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
405 suppressPackageStartupMessages(library(ade4))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
406 suppressPackageStartupMessages(library(pcaMethods))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
407 # Make a PCA and plot scores and loadings.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
408 # First column must contain samples' identifiers.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
409 # Columns 2 to 4 contain factors to colour the plots.
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
410 for (i in 1:3) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
411 idss=ids[which(ids[,i+1]!="NA"),]
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
412 idss=data.frame(idss[idss[,i+1]!="",])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
413 classe=as.factor(idss[[i+1]])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
414 idsample=as.character(idss[[1]])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
415 colour=1:length(levels(classe))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
416 ions=as.matrix(idss[,5:dim(idss)[2]])
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
417 # Removing ions containing NA (not compatible with standard PCA)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
418 ions=t(na.omit(t(ions)))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
419 if(i==1){if(ncol(ions)!=(ncol(idss)-4)){cat("Note:",(ncol(idss)-4)-ncol(ions),"ions were ignored for PCA display due to NA in intensities.\n")}}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
420 # Scaling choice: "uv","none","pareto"
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
421 object=suppressWarnings(prep(ions, scale=scaling, center=TRUE))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
422 if(i==1){if(length(which(apply(ions,2,var)==0))>0){cat("Warning: there are",length(which(apply(ions,2,var)==0)),"constant ions.\n")}}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
423 # ALGO: nipals,svdImpute, Bayesian, svd, probalistic=F
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
424 result <- pca(object, center=F, method="svd", nPcs=2)
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
425 # ADE4 : to plot samples' ellipsoid for each class
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
426 s.class(result@scores, classe, cpoint = 1,xax=1,yax=2,col=colour,sub=sprintf("Scores - PCs %sx%s",1,2), possub="bottomright")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
427 #s.label(result@loadings,label = ions, cpoint = 0, clabel=0.4, xax=1,yax=2,sub="Loadings",possub="bottomright")
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
428 if(i==1){resulti <- result}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
429 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
430 if(indiv) {
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
431 colour <- rep("darkblue",length(resulti@loadings)) ; if(!is.null(indcol)) {colour[-c(indcol)] <- "red"}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
432 plot(resulti@loadings,col=colour,main="Loadings",xaxt="n",yaxt="n",pch=20,
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
433 xlab=bquote(PC1-R^2==.(resulti@R2[1])),ylab=bquote(PC2 - R^2 == .(resulti@R2[2])))
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
434 abline(h=0,v=0)}
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
435 }
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
436
73892ef177e3 Uploaded
melpetera
parents:
diff changeset
437