comparison nmr_preprocessing/ReadFids.R @ 2:7304ec2c9ab7 draft

Uploaded
author marie-tremblay-metatoul
date Mon, 30 Jul 2018 10:33:03 -0400
parents
children
comparison
equal deleted inserted replaced
1:b55559a2854f 2:7304ec2c9ab7
1 ################################################################################################
2 #
3 # Read FIDs in Bruker format
4 #
5 #
6 ################################################################################################
7
8 # ReadFid ==============================================================================
9 ReadFid <- function(path) {
10
11 # Read 1D FID using Bruker XWinNMR and TopSpin format. It is inspired of the
12 # matNMR matlab library which deals with 2D FID and also other formats
13 # Read also the parameters in the acqus file
14
15 paramFile <- file.path(path, "acqus")
16 # BYTEORDA: 0 -> Little Endian 1 -> Big Endian
17 params <- readParams(paramFile, c("TD", "BYTORDA", "DIGMOD", "DECIM", "DSPFVS",
18 "SW_h", "SW", "O1"))
19
20 if (params[["DSPFVS"]] >= 20) {
21 # The group delay first order phase correction is given directly from version 20
22 grpdly <- readParams(paramFile, c("GRPDLY"))
23 params[["GRPDLY"]] <- grpdly[["GRPDLY"]]
24 }
25 TD <- params[["TD"]]
26 endianness <- if (params$BYTORDA)
27 "big" else "little"
28 if (TD%%2 != 0) {
29 stop(paste("Only even numbers are allowed for size in TD because it is complex
30 data with the real and imaginary part for each element.",
31 "The TD value is in the", paramFile, "file"))
32 }
33
34 # Interpret params Dwell Time, time between 2 data points in the FID
35 params[["DT"]] <- 1/(2 * params[["SW_h"]])
36
37 # Read fid
38 fidFile <- file.path(path, "fid")
39 fidOnDisk <- readBin(fidFile, what = "int", n = TD, size = 4L, endian = endianness)
40
41 # Real size that is on disk (it should be equal to TD2, except for TopSpin/Bruker
42 # (which is our case) according to matNMR as just discussed
43 TDOnDisk <- length(fidOnDisk)
44 if (TDOnDisk < TD) {
45 warning("Size is smaller than expected, the rest is filled with zero so the size is the same for every fid")
46 fidGoodSize <- sapply(vector("list", length = TD), function(x) 0)
47 fidGoodSize[1:TDOnDisk] <- fidOnDisk
48
49 } else if (TDOnDisk > TD) {
50 warning("Size is bigger than expected, the rest ignored so the size is the same for every fid")
51 fidGoodSize <- fidOnDisk(1:TD)
52
53 } else {
54 fidGoodSize <- fidOnDisk
55 }
56
57 fidRePart <- fidGoodSize[seq(from = 1, to = TD, by = 2)]
58 fidImPart <- fidGoodSize[seq(from = 2, to = TD, by = 2)]
59 fid <- complex(real = fidRePart, imaginary = fidImPart)
60
61 return(list(fid = fid, params = params))
62 }
63
64
65
66
67 # getDirsContainingFid ==============================================================================
68 getDirsContainingFid <- function(path) {
69 subdirs <- dir(path, full.names = TRUE)
70 if (length(subdirs) > 0) {
71 cond <- sapply(subdirs, function(x) {
72 content <- dir(x)
73 # subdirs must contain fid, acqu and acqus files
74 return("fid" %in% content && "acqu" %in% content && "acqus" %in% content)
75 })
76 subdirs <- subdirs[cond]
77 }
78 return(subdirs)
79 }
80
81
82
83
84
85
86 # beginTreatment ==============================================================================
87
88 beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL,
89 force.real = FALSE) {
90
91 cat("Begin", name, "\n")
92
93
94 # Formatting the Signal_data and Signal_info -----------------------
95
96 vec <- is.vector(Signal_data)
97 if (vec) {
98 Signal_data <- vec2mat(Signal_data)
99 }
100 if (is.vector(Signal_info)) {
101 Signal_info <- vec2mat(Signal_info)
102 }
103 if (!is.null(Signal_data)) {
104 if (!is.matrix(Signal_data)) {
105 stop("Signal_data is not a matrix.")
106 }
107 if (!is.complex(Signal_data) && !is.numeric(Signal_data)) {
108 stop("Signal_data contains non-numerical values.")
109 }
110 }
111 if (!is.null(Signal_info) && !is.matrix(Signal_info)) {
112 stop("Signal_info is not a matrix.")
113 }
114
115
116 Original_data <- Signal_data
117
118 # Extract the real part of the spectrum ---------------------------
119
120 if (force.real) {
121 if (is.complex(Signal_data)) {
122 Signal_data <- Re(Signal_data)
123 } else {
124 # The signal is numeric Im(Signal_data) is zero anyway so let's avoid
125 # using complex(real=...,imaginary=0) which would give a complex signal
126 # in endTreatment()
127 force.real <- FALSE
128 }
129 }
130
131
132 # Return the formatted data and metadata entries --------------------
133
134 return(list(start = proc.time(), vec = vec, force.real = force.real,
135 Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info))
136 }
137
138
139 # endTreatment ==============================================================================
140
141 endTreatment <- function(name, begin_info, Signal_data) {
142 end_time = proc.time() # record it as soon as possible
143 start_time = begin_info[["start"]]
144 delta_time = end_time - start_time
145 delta = delta_time[]
146 cat("End", name, "\n")
147 cat("It lasted",
148 round(delta["user.self"], 3), "s user time,",
149 round(delta["sys.self"] , 3), "s system time and",
150 round(delta["elapsed"] , 3), "s elapsed time.\n")
151 if (begin_info[["force.real"]]) {
152 # The imaginary part is left untouched
153 i <- complex(real=0, imaginary=1)
154 Signal_data = Signal_data + i * Im(begin_info[["Original_data"]])
155 }
156 if (begin_info[["vec"]]) {
157 Signal_data = Signal_data[1,]
158 }
159 return(Signal_data)
160 }
161
162
163 # checkArg ==============================================================================
164
165 checkArg <- function(arg, checks, can.be.null=FALSE) {
166 check.list <- list(bool=c(is.logical, "a boolean"),
167 int =c(function(x){x%%1==0}, "an integer"),
168 num =c(is.numeric, "a numeric"),
169 str =c(is.character, "a string"),
170 pos =c(function(x){x>0}, "positive"),
171 pos0=c(function(x){x>=0}, "positive or zero"),
172 l1 =c(function(x){length(x)==1}, "of length 1")
173 )
174 if (is.null(arg)) {
175 if (!can.be.null) {
176 stop(deparse(substitute(arg)), " is null.")
177 }
178 } else {
179 if (is.matrix(arg)) {
180 stop(deparse(substitute(arg)), " is not scalar.")
181 }
182 for (c in checks) {
183 if (!check.list[[c]][[1]](arg)) {
184 stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".")
185 }
186 }
187 }
188 }
189
190
191 # getArg ==============================================================================
192
193 getArg <- function(arg, info, argname, can.be.absent=FALSE) {
194 if (is.null(arg)) {
195 start <- paste("impossible to get argument", argname, "it was not given directly and");
196 if (!is.matrix(info)) {
197 stop(paste(start, "the info matrix was not given"))
198 }
199 if (!(argname %in% colnames(info))) {
200 if (can.be.absent) {
201 return(NULL)
202 } else {
203 stop(paste(start, "is not in the info matrix"))
204 }
205 }
206 if (nrow(info) < 1) {
207 stop(paste(start, "the info matrix has no row"))
208 }
209 arg <- info[1,argname]
210 if (is.na(arg)) {
211 stop(paste(start, "it is NA in the info matrix"))
212 }
213 }
214 return(arg)
215 }
216
217
218
219 # getTitle ==============================================================================
220
221 # Get the name of the signal from the title file or fromt the name of the subdirectory
222 # Get the name of the signal from the title file or fromt the name of the subdirectory
223
224 getTitle <- function(path, l, subdirs) {
225 title <- NULL
226 title_file <- file.path(file.path(file.path(path, "pdata"), "1"), "title")
227 if (file.exists(title_file)) {
228 lines <- readLines(title_file, warn = FALSE)
229 if (length(lines) >= 1) {
230 first_line <- gsub("^\\s+|\\s+$", "", lines[l])
231 if (nchar(first_line) >= 1) {
232 title <- first_line
233 } else {
234 warning(paste("The first line of the title file is blank for directory ",
235 path))
236 }
237 } else {
238 warning(paste("The title file is empty for directory ", path))
239 }
240 } else {
241 warning(paste("Title file doesn't exists for directory ", path, "\n the repertory name is used instead"))
242 }
243 if (is.null(title)) {
244 if(subdirs) {
245 separator <- .Platform$file.sep
246 path_elem <- strsplit(path,separator)[[1]]
247 title <- paste(path_elem[length(path_elem)-1], path_elem[length(path_elem)], sep = "_")
248 } else{title <- basename(path)}
249 }
250 return(title)
251 }
252
253
254
255 # readParams ==============================================================================
256 # Read parameter values for Fid_info in the ReadFids function
257
258 readParams <- function(file, paramsName) {
259
260 isDigit <- function(c) {
261 return(suppressWarnings(!is.na(as.numeric(c))))
262 }
263 lines <- readLines(file)
264 params <- sapply(paramsName, function(x) NULL)
265
266 for (paramName in paramsName) {
267 # Find the line with the parameter I add a '$' '=' in the pattern so that for
268 # example 'TD0' is not found where I look for 'TD' and LOCSW and WBSW when I look
269 # for 'SW'
270 pattern <- paste("\\$", paramName, "=", sep = "")
271 occurences <- grep(pattern, lines)
272 if (length(occurences) == 0L) {
273 stop(paste(file, "has no field", pattern))
274 }
275 if (length(occurences) > 1L) {
276 warning(paste(file, "has more that one field", pattern, " I take the first one"))
277 }
278 line <- lines[occurences[1]]
279
280 # Cut beginning and end of the line '##$TD= 65536' -> '65536'
281 igual = as.numeric(regexpr("=", line))
282
283 first <- igual
284 while (first <= nchar(line) & !isDigit(substr(line, first, first))) {
285 first <- first + 1
286 }
287 last <- nchar(line)
288 while (last > 0 & !isDigit(substr(line, last, last))) {
289 last <- last - 1
290 }
291 params[paramName] <- as.numeric(substr(line, first, last))
292 }
293 return(params)
294 }
295
296
297
298 # ReadFids ==============================================================================
299 ReadFids <- function(path, l = 1, subdirs = FALSE) {
300
301 # Data initialisation and checks ----------------------------------------------
302 begin_info <- beginTreatment("ReadFids")
303 checkArg(path, c("str"))
304 checkArg(l, c("pos"))
305 if (file.exists(path) == FALSE) {
306 stop(paste("Invalid path:", path))
307 }
308
309
310 # Extract the FIDs and their info ----------------------------------------------
311
312 if (subdirs == FALSE) {
313 fidDirs <- getDirsContainingFid(path)
314 n <- length(fidDirs)
315 if (n == 0L) {
316 stop(paste("No valid fid in", path))
317 }
318 fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F)
319 for (i in 1:n) {
320 fidList <- ReadFid(fidDirs[i])
321 fid <- fidList[["fid"]]
322 info <- fidList[["params"]]
323 m <- length(fid)
324 if (i == 1) {
325 Fid_data <- matrix(nrow = n, ncol = m, dimnames = list(fidNames,
326 info[["DT"]] * (0:(m - 1))))
327 Fid_info <- matrix(nrow = n, ncol = length(info), dimnames = list(fidNames,
328 names(info)))
329 }
330 Fid_data[i, ] <- fid
331 Fid_info[i, ] <- unlist(info)
332 }
333
334 } else {
335 maindirs <- dir(path, full.names = TRUE)
336 Fid_data <- numeric()
337 Fid_info <- numeric()
338
339 fidDirs <- c()
340 for (j in maindirs) {
341 fd <- getDirsContainingFid(j)
342 n <- length(fd)
343 if (n > 0L) {
344 fidDirs <- c(fidDirs, fd)
345 } else {warning(paste("No valid fid in",j ))}
346 }
347
348 fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F)
349 for (i in 1:length(fidNames)) {
350 fidList <- ReadFid(fidDirs[i])
351 fid <- fidList[["fid"]]
352 info <- fidList[["params"]]
353 m <- length(fid)
354 if (i == 1) {
355 Fid_data <- matrix(nrow = length(fidNames), ncol = m, dimnames = list(fidNames,
356 info[["DT"]] * (0:(m - 1))))
357 Fid_info <- matrix(nrow = length(fidNames), ncol = length(info), dimnames = list(fidNames,
358 names(info)))
359 }
360 Fid_data[i, ] <- fid
361 Fid_info[i, ] <- unlist(info)
362 }
363
364
365 }
366
367 # Check for non-unique IDs ----------------------------------------------
368 NonnuniqueIds <- sum(duplicated(row.names(Fid_data)))
369 cat("dim Fid_data: ", dim(Fid_data), "\n")
370 cat("IDs: ", rownames(Fid_data), "\n")
371 cat("non-unique IDs?", NonnuniqueIds, "\n")
372 if (NonnuniqueIds > 0) {
373 warning("There are duplicated IDs: ", Fid_data[duplicated(Fid_data)])
374 }
375
376
377 # Return the results ----------------------------------------------
378 return(list(Fid_data = endTreatment("ReadFids", begin_info, Fid_data), Fid_info = Fid_info))
379
380 }
381
382