Mercurial > repos > lain > ms2snoop
comparison moulinetteJF/retraitement_MSpurity_V4.R @ 0:67733206be53 draft
" master branch Updating"
author | lain |
---|---|
date | Thu, 14 Apr 2022 10:23:15 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:67733206be53 |
---|---|
1 ## read and process mspurity W4M files | |
2 ## create a summary of fragment for each precursor and a graphics of peseudo spectra + correlation on which checking of fragment is based on | |
3 ## V3 try to identify and process multiple files for 1 precursor which may occur if different collision energy are used | |
4 ## V4 elimination of correlation = NA. Correlation is done with precursor, if precursor is not present correlation with most intense peak | |
5 | |
6 ##require(msPurity) | |
7 | |
8 ########################################################################################### | |
9 | |
10 plot_pseudoSpectra <- function(x, fid, sumInt, vmz, corAbsInt, refcol, cNAME) { | |
11 ## function to compute sum of intensities among scans for all m/z kept (cor > r_threshold & minimum number of scans) | |
12 ## and plot pseudo spectra | |
13 ## x dataframe scan X fragments with scans number in the 1st column and ions in next with intensities | |
14 ## fid file id when several a precursor has been detected in several files | |
15 # du fait de la difference de nombre de colonne entre la dataframe qui inclue les scans en 1ere col, mzRef se decale de 1 | |
16 refcol <- refcol-1 | |
17 ## compute relative intensities max=100% | |
18 relInt <- sumInt[-1] | |
19 relInt <- relInt/max(relInt) | |
20 | |
21 ## define max value on vertical axis (need to increase in order to plot the label of fragments) | |
22 ymax <- max(relInt)+0.2*max(relInt) | |
23 | |
24 par(mfrow=c(2,1)) | |
25 plot(vmz, relInt, type="h", ylim=c(0,ymax), main = cNAME) | |
26 ## low correl coef. will be display in grey | |
27 corLow <- which(round(corAbsInt,2) < r_threshold) | |
28 | |
29 lbmzcor <- paste(as.character(vmz),"(r=",round(corAbsInt,2),")", sep="") | |
30 | |
31 if (length(corLow) > 0) { | |
32 text(vmz[corLow], relInt[corLow], lbmzcor[corLow], cex=0.5, col = "grey", srt = 90 , adj=0) | |
33 if (length(vmz) - length(corLow) > 1) text(vmz[-c(refcol,corLow)], relInt[-c(refcol,corLow)], lbmzcor[-c(refcol,corLow)], cex=0.6, col = 1, srt = 90 , adj=0) | |
34 } else { | |
35 if (length(vmz) > 1) text(vmz[-c(refcol)], relInt[-c(refcol)], lbmzcor[-c(refcol)], cex=0.6, col = 1, srt = 90 , adj=0) | |
36 } | |
37 | |
38 text(vmz[refcol], relInt[refcol], lbmzcor[refcol], cex=0.8, col = 2, srt = 90, adj=0 ) | |
39 | |
40 ## prepare result file | |
41 corValid <- (round(corAbsInt,2) >= r_threshold) | |
42 cpRes <- data.frame(rep(cNAME, length(vmz)), | |
43 rep(fid, length(vmz)), | |
44 vmz,corAbsInt,sumInt[-1],relInt,corValid) | |
45 | |
46 colnames(cpRes) <- c("compoundName","fileid","fragments_mz","CorWithPrecursor","AbsoluteIntensity","relativeIntensity","corValid") | |
47 return(cpRes) | |
48 | |
49 } | |
50 | |
51 ## function for extraction of fragments corresponding to precursors detected by MSPurity | |
52 Xtract_fragments <- function(mzref, rtref, cNAME, r_threshold = 0.85, seuil_ra = 0.1, tolmz = 0.01, tolrt = 60) { | |
53 | |
54 ## filter precursor in the precursors file based on mz and rt in the compound list | |
55 cat("processing ",cNAME,"\n") | |
56 selprec <- which((abs(prec$precurMtchMZ - mzref) <= tolmz) & (abs(prec$precurMtchRT - rtref) <= tolrt)) | |
57 | |
58 ## check if there is the precursor in the file | |
59 if (length(selprec) > 0) { | |
60 | |
61 sprecini <- prec[selprec,] | |
62 | |
63 ## check if fragments corresponding to precursor are found in several files (collision energy) | |
64 ## this lead to a processing for each fileid | |
65 mf <- levels(as.factor(sprecini$fileid)) | |
66 nbf <- length(mf) | |
67 if (nbf > 1) cat(" several files detected for this compounds :\n") | |
68 | |
69 for (f in 1:nbf) { | |
70 | |
71 sprec <- sprecini[sprecini$fileid == mf[f],] | |
72 | |
73 ## selection of fragment in the fragments file with the grpid common in both fragments and precursors | |
74 selfrgt <- levels(as.factor(sprec$grpid)) | |
75 sfrgt <- frgt[frgt$grpid %in% selfrgt & frgt$fileid == mf[f],] | |
76 | |
77 ## filter fragments on relative intensity seuil_ra = user defined parameter (MSpurity flags could be used here) | |
78 sfrgtfil <- sfrgt[sfrgt$ra > seuil_ra,] | |
79 | |
80 mznominal <- round(x = sfrgtfil$mz, mzdecimal) | |
81 sfrgtfil <- data.frame(sfrgtfil, mznominal) | |
82 | |
83 ## creation of cross table row=scan col=mz X=ra | |
84 vmz <- levels(as.factor(sfrgtfil$mznominal)) | |
85 #fscan <- levels(as.factor(sfrgtfil$acquisitionNum)) | |
86 | |
87 cat(" fragments :",vmz) | |
88 # dsIntra <- matrix(NA,nrow = length(vscan), ncol = length(vmz)) | |
89 # rownames(dsIntra) <- fscan | |
90 # dsIntra <- data.frame(fscan,dsIntra) | |
91 # colnames(dsIntra) <- c("fscan",vmz) | |
92 | |
93 ## mz of precursor in data precursor to check correlation with | |
94 mzPrec <- paste("mz",round(mean(sprec$mz),mzdecimal),sep="") | |
95 | |
96 for (m in 1:length(vmz)) { | |
97 | |
98 ## absolute intensity | |
99 cln <- c(which(colnames(sfrgtfil) == "acquisitionNum"), which(colnames(sfrgtfil) == "i")) | |
100 int_mz <- sfrgtfil[sfrgtfil$mznominal == vmz[m], cln] | |
101 colnames(int_mz)[2] <- paste("mz", vmz[m], sep="") | |
102 | |
103 ## average intensities of mass in duplicate scans | |
104 compScans <- aggregate(x = int_mz, by = list(int_mz[[1]]), FUN = mean) | |
105 int_mz <- compScans[,-1] | |
106 | |
107 if (m == 1) { | |
108 dsAbsInt <- int_mz | |
109 #dsRelInt <- ra_mz | |
110 } else { | |
111 dsAbsInt <- merge(x = dsAbsInt, y = int_mz, by.x = 1, by.y = 1, all.x=TRUE, all.y=TRUE) | |
112 #dsRelInt <- merge(x = dsRelInt, y = ra_mz, by.x = 1, by.y = 1, all.x=TRUE, all.y=TRUE) | |
113 | |
114 } | |
115 } | |
116 ## for debug | |
117 ## write.table(x = dsAbsInt,file=paste(cNAME,"dsAbsInt.txt",sep=""), row.names = FALSE, sep="\t") | |
118 | |
119 ## elimination of mz with less than minNumberScan scans (user defined parameter) | |
120 xmz <- rep(NA,ncol(dsAbsInt)-1) | |
121 sumInt <- rep(NA,ncol(dsAbsInt)) | |
122 nbxmz <- 0 | |
123 NbScanCheck <- min(nrow(dsAbsInt),minNumberScan) | |
124 | |
125 for (j in 2:ncol(dsAbsInt)) { | |
126 sumInt[j] <- sum(dsAbsInt[j],na.rm = TRUE) | |
127 if (sum(!is.na(dsAbsInt[[j]])) < NbScanCheck) { | |
128 nbxmz <- nbxmz + 1 | |
129 xmz[nbxmz] <- j | |
130 } | |
131 } | |
132 | |
133 xmz <- xmz[-which(is.na(xmz))] | |
134 if (length(xmz) > 0) { | |
135 dsAbsInt <- dsAbsInt[,-c(xmz)] | |
136 sumInt <- sumInt[-c(xmz)] | |
137 ## liste des mz keeped decale de 1 avec dsAbsInt | |
138 vmz <- as.numeric(vmz[-c(xmz-1)]) | |
139 } | |
140 | |
141 ## reference ion for correlation computing = precursor OR maximum intensity ion in precursor is not present | |
142 refcol <- which(colnames(dsAbsInt) == mzPrec) | |
143 if (length(refcol) == 0) refcol <- which(sumInt == max(sumInt, na.rm = TRUE)) | |
144 | |
145 pdf(file=paste(cNAME,"_processing_file",mf[f],".pdf",sep=""), width = 8, height = 11 ); par(mfrow=c(3,2)) | |
146 | |
147 ## pearson correlations between absolute intensities computing | |
148 corAbsInt <- rep(NA,length(vmz)) | |
149 | |
150 if (length(refcol) > 0) { | |
151 for (i in 2:length(dsAbsInt)) { | |
152 corAbsInt[i-1] <- cor(x = dsAbsInt[[refcol]], y=dsAbsInt[[i]], use = "pairwise.complete.obs", method = "pearson") | |
153 plot(dsAbsInt[[refcol]],dsAbsInt[[i]], | |
154 xlab = colnames(dsAbsInt)[refcol], ylab=colnames(dsAbsInt)[i], | |
155 main=paste(cNAME," corr coeff r=",round(corAbsInt[i-1],2),sep="")) | |
156 } | |
157 ## plot pseudo spectra | |
158 resCompByfile <- plot_pseudoSpectra(x= dsAbsInt, fid = mf[f], sumInt = sumInt, vmz = vmz, corAbsInt = corAbsInt, refcol = refcol, cNAME = cNAME ) | |
159 if (f == 1) resComp <- resCompByfile | |
160 } else { | |
161 resCompByfile <- NULL | |
162 cat(" non detected in fragments file \n") | |
163 } | |
164 if (!is.null(resCompByfile)) resComp <- rbind(resComp,resCompByfile) | |
165 cat("\n") | |
166 dev.off() | |
167 } | |
168 } else { | |
169 resComp <- NULL | |
170 cat(" non detected in precursor file \n") | |
171 } | |
172 | |
173 return(resComp) | |
174 } | |
175 | |
176 | |
177 #################################### start ####################################################### | |
178 | |
179 ## FOLDER AND FILES | |
180 setwd(dir = "C:/Users/jfmartin/Documents/PROJETS/PROG/tools_R/MSPurity/positif2/V2") | |
181 | |
182 #load("Galaxy541-[msPurity.filterFragSpectra_on_data_540__RData].rdata") | |
183 | |
184 ## MSpurity precursors file | |
185 prec <- read.table(file = "msPurity.filterFragSpectra_on_data_60__peaklist_precursors.tsv", | |
186 header = TRUE, sep="\t") | |
187 | |
188 ## MSpurity fragments file | |
189 frgt <- read.table(file = "msPurity.filterFragSpectra_on_data_60__peaklist_fragments.tsv", | |
190 header = TRUE, sep="\t") | |
191 | |
192 ## list of compounds : col1=Name of molecule, col2=m/z, col3=retention time | |
193 compounds <- read.table(file = "compounds_pos.txt", sep="\t", header = TRUE) | |
194 | |
195 ## PARAMETERS | |
196 ## tolerance for mz(dalton) rt(seconds) to match the standard in the compounds list with the precursor MSpurity file | |
197 tolmz <- 0.01 | |
198 tolrt <- 20 | |
199 | |
200 ## relative intensity threshold | |
201 seuil_ra = 0.05 | |
202 ## nb decimal for mz | |
203 mzdecimal <- 0 | |
204 ## r pearson correlation threshold between precursor and fragment absolute intensity | |
205 r_threshold <- 0.85 | |
206 ## fragments are kept if there are found in a minimum number of scans | |
207 minNumberScan <- 8 | |
208 | |
209 for (i in 1:nrow(compounds)) { | |
210 ## loop execution for all compounds in the compounds file | |
211 resCor <- NULL | |
212 resCor <- Xtract_fragments(mzref = compounds[[2]][i], | |
213 rtref = compounds[[3]][i] * 60, | |
214 cNAME = compounds[[1]][i], | |
215 r_threshold, seuil_ra, tolmz, tolrt) | |
216 if (i == 1 & !is.null(resCor)) resAll <- resCor else if (!is.null(resCor)) resAll <- rbind(resAll,resCor) | |
217 | |
218 } | |
219 | |
220 write.table(x = resAll, file = "compound_fragments_result.txt", sep="\t", row.names = FALSE) | |
221 | |
222 ######################################## end call ################################################ | |
223 |