0
|
1 ## read and process mspurity W4M files
|
|
2 ## create a summary of fragment for each precursor and a graphics of peseudo spectra + correlation on which checking of fragment is based on
|
|
3 ## V3 try to identify and process multiple files for 1 precursor which may occur if different collision energy are used
|
|
4 ## V4 elimination of correlation = NA. Correlation is done with precursor, if precursor is not present correlation with most intense peak
|
|
5
|
|
6 ##require(msPurity)
|
|
7
|
|
8 ###########################################################################################
|
|
9
|
|
10 plot_pseudoSpectra <- function(x, fid, sumInt, vmz, corAbsInt, refcol, cNAME) {
|
|
11 ## function to compute sum of intensities among scans for all m/z kept (cor > r_threshold & minimum number of scans)
|
|
12 ## and plot pseudo spectra
|
|
13 ## x dataframe scan X fragments with scans number in the 1st column and ions in next with intensities
|
|
14 ## fid file id when several a precursor has been detected in several files
|
|
15 # du fait de la difference de nombre de colonne entre la dataframe qui inclue les scans en 1ere col, mzRef se decale de 1
|
|
16 refcol <- refcol-1
|
|
17 ## compute relative intensities max=100%
|
|
18 relInt <- sumInt[-1]
|
|
19 relInt <- relInt/max(relInt)
|
|
20
|
|
21 ## define max value on vertical axis (need to increase in order to plot the label of fragments)
|
|
22 ymax <- max(relInt)+0.2*max(relInt)
|
|
23
|
|
24 par(mfrow=c(2,1))
|
|
25 plot(vmz, relInt, type="h", ylim=c(0,ymax), main = cNAME)
|
|
26 ## low correl coef. will be display in grey
|
|
27 corLow <- which(round(corAbsInt,2) < r_threshold)
|
|
28
|
|
29 lbmzcor <- paste(as.character(vmz),"(r=",round(corAbsInt,2),")", sep="")
|
|
30
|
|
31 if (length(corLow) > 0) {
|
|
32 text(vmz[corLow], relInt[corLow], lbmzcor[corLow], cex=0.5, col = "grey", srt = 90 , adj=0)
|
|
33 if (length(vmz) - length(corLow) > 1) text(vmz[-c(refcol,corLow)], relInt[-c(refcol,corLow)], lbmzcor[-c(refcol,corLow)], cex=0.6, col = 1, srt = 90 , adj=0)
|
|
34 } else {
|
|
35 if (length(vmz) > 1) text(vmz[-c(refcol)], relInt[-c(refcol)], lbmzcor[-c(refcol)], cex=0.6, col = 1, srt = 90 , adj=0)
|
|
36 }
|
|
37
|
|
38 text(vmz[refcol], relInt[refcol], lbmzcor[refcol], cex=0.8, col = 2, srt = 90, adj=0 )
|
|
39
|
|
40 ## prepare result file
|
|
41 corValid <- (round(corAbsInt,2) >= r_threshold)
|
|
42 cpRes <- data.frame(rep(cNAME, length(vmz)),
|
|
43 rep(fid, length(vmz)),
|
|
44 vmz,corAbsInt,sumInt[-1],relInt,corValid)
|
|
45
|
|
46 colnames(cpRes) <- c("compoundName","fileid","fragments_mz","CorWithPrecursor","AbsoluteIntensity","relativeIntensity","corValid")
|
|
47 return(cpRes)
|
|
48
|
|
49 }
|
|
50
|
|
51 ## function for extraction of fragments corresponding to precursors detected by MSPurity
|
|
52 Xtract_fragments <- function(mzref, rtref, cNAME, r_threshold = 0.85, seuil_ra = 0.1, tolmz = 0.01, tolrt = 60) {
|
|
53
|
|
54 ## filter precursor in the precursors file based on mz and rt in the compound list
|
|
55 cat("processing ",cNAME,"\n")
|
|
56 selprec <- which((abs(prec$precurMtchMZ - mzref) <= tolmz) & (abs(prec$precurMtchRT - rtref) <= tolrt))
|
|
57
|
|
58 ## check if there is the precursor in the file
|
|
59 if (length(selprec) > 0) {
|
|
60
|
|
61 sprecini <- prec[selprec,]
|
|
62
|
|
63 ## check if fragments corresponding to precursor are found in several files (collision energy)
|
|
64 ## this lead to a processing for each fileid
|
|
65 mf <- levels(as.factor(sprecini$fileid))
|
|
66 nbf <- length(mf)
|
|
67 if (nbf > 1) cat(" several files detected for this compounds :\n")
|
|
68
|
|
69 for (f in 1:nbf) {
|
|
70
|
|
71 sprec <- sprecini[sprecini$fileid == mf[f],]
|
|
72
|
|
73 ## selection of fragment in the fragments file with the grpid common in both fragments and precursors
|
|
74 selfrgt <- levels(as.factor(sprec$grpid))
|
|
75 sfrgt <- frgt[frgt$grpid %in% selfrgt & frgt$fileid == mf[f],]
|
|
76
|
|
77 ## filter fragments on relative intensity seuil_ra = user defined parameter (MSpurity flags could be used here)
|
|
78 sfrgtfil <- sfrgt[sfrgt$ra > seuil_ra,]
|
|
79
|
|
80 mznominal <- round(x = sfrgtfil$mz, mzdecimal)
|
|
81 sfrgtfil <- data.frame(sfrgtfil, mznominal)
|
|
82
|
|
83 ## creation of cross table row=scan col=mz X=ra
|
|
84 vmz <- levels(as.factor(sfrgtfil$mznominal))
|
|
85 #fscan <- levels(as.factor(sfrgtfil$acquisitionNum))
|
|
86
|
|
87 cat(" fragments :",vmz)
|
|
88 # dsIntra <- matrix(NA,nrow = length(vscan), ncol = length(vmz))
|
|
89 # rownames(dsIntra) <- fscan
|
|
90 # dsIntra <- data.frame(fscan,dsIntra)
|
|
91 # colnames(dsIntra) <- c("fscan",vmz)
|
|
92
|
|
93 ## mz of precursor in data precursor to check correlation with
|
|
94 mzPrec <- paste("mz",round(mean(sprec$mz),mzdecimal),sep="")
|
|
95
|
|
96 for (m in 1:length(vmz)) {
|
|
97
|
|
98 ## absolute intensity
|
|
99 cln <- c(which(colnames(sfrgtfil) == "acquisitionNum"), which(colnames(sfrgtfil) == "i"))
|
|
100 int_mz <- sfrgtfil[sfrgtfil$mznominal == vmz[m], cln]
|
|
101 colnames(int_mz)[2] <- paste("mz", vmz[m], sep="")
|
|
102
|
|
103 ## average intensities of mass in duplicate scans
|
|
104 compScans <- aggregate(x = int_mz, by = list(int_mz[[1]]), FUN = mean)
|
|
105 int_mz <- compScans[,-1]
|
|
106
|
|
107 if (m == 1) {
|
|
108 dsAbsInt <- int_mz
|
|
109 #dsRelInt <- ra_mz
|
|
110 } else {
|
|
111 dsAbsInt <- merge(x = dsAbsInt, y = int_mz, by.x = 1, by.y = 1, all.x=TRUE, all.y=TRUE)
|
|
112 #dsRelInt <- merge(x = dsRelInt, y = ra_mz, by.x = 1, by.y = 1, all.x=TRUE, all.y=TRUE)
|
|
113
|
|
114 }
|
|
115 }
|
|
116 ## for debug
|
|
117 ## write.table(x = dsAbsInt,file=paste(cNAME,"dsAbsInt.txt",sep=""), row.names = FALSE, sep="\t")
|
|
118
|
|
119 ## elimination of mz with less than minNumberScan scans (user defined parameter)
|
|
120 xmz <- rep(NA,ncol(dsAbsInt)-1)
|
|
121 sumInt <- rep(NA,ncol(dsAbsInt))
|
|
122 nbxmz <- 0
|
|
123 NbScanCheck <- min(nrow(dsAbsInt),minNumberScan)
|
|
124
|
|
125 for (j in 2:ncol(dsAbsInt)) {
|
|
126 sumInt[j] <- sum(dsAbsInt[j],na.rm = TRUE)
|
|
127 if (sum(!is.na(dsAbsInt[[j]])) < NbScanCheck) {
|
|
128 nbxmz <- nbxmz + 1
|
|
129 xmz[nbxmz] <- j
|
|
130 }
|
|
131 }
|
|
132
|
|
133 xmz <- xmz[-which(is.na(xmz))]
|
|
134 if (length(xmz) > 0) {
|
|
135 dsAbsInt <- dsAbsInt[,-c(xmz)]
|
|
136 sumInt <- sumInt[-c(xmz)]
|
|
137 ## liste des mz keeped decale de 1 avec dsAbsInt
|
|
138 vmz <- as.numeric(vmz[-c(xmz-1)])
|
|
139 }
|
|
140
|
|
141 ## reference ion for correlation computing = precursor OR maximum intensity ion in precursor is not present
|
|
142 refcol <- which(colnames(dsAbsInt) == mzPrec)
|
|
143 if (length(refcol) == 0) refcol <- which(sumInt == max(sumInt, na.rm = TRUE))
|
|
144
|
|
145 pdf(file=paste(cNAME,"_processing_file",mf[f],".pdf",sep=""), width = 8, height = 11 ); par(mfrow=c(3,2))
|
|
146
|
|
147 ## pearson correlations between absolute intensities computing
|
|
148 corAbsInt <- rep(NA,length(vmz))
|
|
149
|
|
150 if (length(refcol) > 0) {
|
|
151 for (i in 2:length(dsAbsInt)) {
|
|
152 corAbsInt[i-1] <- cor(x = dsAbsInt[[refcol]], y=dsAbsInt[[i]], use = "pairwise.complete.obs", method = "pearson")
|
|
153 plot(dsAbsInt[[refcol]],dsAbsInt[[i]],
|
|
154 xlab = colnames(dsAbsInt)[refcol], ylab=colnames(dsAbsInt)[i],
|
|
155 main=paste(cNAME," corr coeff r=",round(corAbsInt[i-1],2),sep=""))
|
|
156 }
|
|
157 ## plot pseudo spectra
|
|
158 resCompByfile <- plot_pseudoSpectra(x= dsAbsInt, fid = mf[f], sumInt = sumInt, vmz = vmz, corAbsInt = corAbsInt, refcol = refcol, cNAME = cNAME )
|
|
159 if (f == 1) resComp <- resCompByfile
|
|
160 } else {
|
|
161 resCompByfile <- NULL
|
|
162 cat(" non detected in fragments file \n")
|
|
163 }
|
|
164 if (!is.null(resCompByfile)) resComp <- rbind(resComp,resCompByfile)
|
|
165 cat("\n")
|
|
166 dev.off()
|
|
167 }
|
|
168 } else {
|
|
169 resComp <- NULL
|
|
170 cat(" non detected in precursor file \n")
|
|
171 }
|
|
172
|
|
173 return(resComp)
|
|
174 }
|
|
175
|
|
176
|
|
177 #################################### start #######################################################
|
|
178
|
|
179 ## FOLDER AND FILES
|
|
180 setwd(dir = "C:/Users/jfmartin/Documents/PROJETS/PROG/tools_R/MSPurity/positif2/V2")
|
|
181
|
|
182 #load("Galaxy541-[msPurity.filterFragSpectra_on_data_540__RData].rdata")
|
|
183
|
|
184 ## MSpurity precursors file
|
|
185 prec <- read.table(file = "msPurity.filterFragSpectra_on_data_60__peaklist_precursors.tsv",
|
|
186 header = TRUE, sep="\t")
|
|
187
|
|
188 ## MSpurity fragments file
|
|
189 frgt <- read.table(file = "msPurity.filterFragSpectra_on_data_60__peaklist_fragments.tsv",
|
|
190 header = TRUE, sep="\t")
|
|
191
|
|
192 ## list of compounds : col1=Name of molecule, col2=m/z, col3=retention time
|
|
193 compounds <- read.table(file = "compounds_pos.txt", sep="\t", header = TRUE)
|
|
194
|
|
195 ## PARAMETERS
|
|
196 ## tolerance for mz(dalton) rt(seconds) to match the standard in the compounds list with the precursor MSpurity file
|
|
197 tolmz <- 0.01
|
|
198 tolrt <- 20
|
|
199
|
|
200 ## relative intensity threshold
|
|
201 seuil_ra = 0.05
|
|
202 ## nb decimal for mz
|
|
203 mzdecimal <- 0
|
|
204 ## r pearson correlation threshold between precursor and fragment absolute intensity
|
|
205 r_threshold <- 0.85
|
|
206 ## fragments are kept if there are found in a minimum number of scans
|
|
207 minNumberScan <- 8
|
|
208
|
|
209 for (i in 1:nrow(compounds)) {
|
|
210 ## loop execution for all compounds in the compounds file
|
|
211 resCor <- NULL
|
|
212 resCor <- Xtract_fragments(mzref = compounds[[2]][i],
|
|
213 rtref = compounds[[3]][i] * 60,
|
|
214 cNAME = compounds[[1]][i],
|
|
215 r_threshold, seuil_ra, tolmz, tolrt)
|
|
216 if (i == 1 & !is.null(resCor)) resAll <- resCor else if (!is.null(resCor)) resAll <- rbind(resAll,resCor)
|
|
217
|
|
218 }
|
|
219
|
|
220 write.table(x = resAll, file = "compound_fragments_result.txt", sep="\t", row.names = FALSE)
|
|
221
|
|
222 ######################################## end call ################################################
|
|
223
|