comparison nmr_preprocessing/ReadFids_script.R @ 2:7304ec2c9ab7 draft

Uploaded
author marie-tremblay-metatoul
date Mon, 30 Jul 2018 10:33:03 -0400
parents
children
comparison
equal deleted inserted replaced
1:b55559a2854f 2:7304ec2c9ab7
1 ################################################################################################
2 #
3 # Read FIDs in Bruker format
4 #
5 #
6 ################################################################################################
7
8
9 # vec2mat ==============================================================================
10 vec2mat <- function(vec) {
11 return(matrix(vec, nrow = 1, dimnames = list(c(1), names(vec))))
12 }
13
14
15 # ReadFid ==============================================================================
16 ReadFid <- function(path) {
17
18 # Read 1D FID using Bruker XWinNMR and TopSpin format. It is inspired of the
19 # matNMR matlab library which deals with 2D FID and also other formats
20 # Read also the parameters in the acqus file
21
22 paramFile <- file.path(path, "acqus")
23 # BYTEORDA: 0 -> Little Endian 1 -> Big Endian
24 params <- readParams(paramFile, c("TD", "BYTORDA", "DIGMOD", "DECIM", "DSPFVS",
25 "SW_h", "SW", "O1"))
26
27 if (params[["DSPFVS"]] >= 20) {
28 # The group delay first order phase correction is given directly from version 20
29 grpdly <- readParams(paramFile, c("GRPDLY"))
30 params[["GRPDLY"]] <- grpdly[["GRPDLY"]]
31 }
32 TD <- params[["TD"]]
33 endianness <- if (params$BYTORDA)
34 "big" else "little"
35 if (TD%%2 != 0) {
36 stop(paste("Only even numbers are allowed for size in TD because it is complex
37 data with the real and imaginary part for each element.",
38 "The TD value is in the", paramFile, "file"))
39 }
40
41 # Interpret params Dwell Time, time between 2 data points in the FID
42 params[["DT"]] <- 1/(2 * params[["SW_h"]])
43
44 # Read fid
45 fidFile <- file.path(path, "fid")
46 fidOnDisk <- readBin(fidFile, what = "int", n = TD, size = 4L, endian = endianness)
47
48 # Real size that is on disk (it should be equal to TD2, except for TopSpin/Bruker
49 # (which is our case) according to matNMR as just discussed
50 TDOnDisk <- length(fidOnDisk)
51 if (TDOnDisk < TD) {
52 warning("Size is smaller than expected, the rest is filled with zero so the size is the same for every fid")
53 fidGoodSize <- sapply(vector("list", length = TD), function(x) 0)
54 fidGoodSize[1:TDOnDisk] <- fidOnDisk
55
56 } else if (TDOnDisk > TD) {
57 warning("Size is bigger than expected, the rest ignored so the size is the same for every fid")
58 fidGoodSize <- fidOnDisk(1:TD)
59
60 } else {
61 fidGoodSize <- fidOnDisk
62 }
63
64 fidRePart <- fidGoodSize[seq(from = 1, to = TD, by = 2)]
65 fidImPart <- fidGoodSize[seq(from = 2, to = TD, by = 2)]
66 fid <- complex(real = fidRePart, imaginary = fidImPart)
67
68 return(list(fid = fid, params = params))
69 }
70
71
72
73
74 # getDirsContainingFid ==============================================================================
75 getDirsContainingFid <- function(path) {
76 subdirs <- dir(path, full.names = TRUE)
77 if (length(subdirs) > 0) {
78 cond <- sapply(subdirs, function(x) {
79 content <- dir(x)
80 # subdirs must contain fid, acqu and acqus files
81 return("fid" %in% content && "acqu" %in% content && "acqus" %in% content)
82 })
83 subdirs <- subdirs[cond]
84 }
85 return(subdirs)
86 }
87
88
89
90
91
92
93 # beginTreatment ==============================================================================
94
95 beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL,
96 force.real = FALSE) {
97
98 cat("Begin", name, "\n")
99
100
101 # Formatting the Signal_data and Signal_info -----------------------
102
103 vec <- is.vector(Signal_data)
104 if (vec) {
105 Signal_data <- vec2mat(Signal_data)
106 }
107 if (is.vector(Signal_info)) {
108 Signal_info <- vec2mat(Signal_info)
109 }
110 if (!is.null(Signal_data)) {
111 if (!is.matrix(Signal_data)) {
112 stop("Signal_data is not a matrix.")
113 }
114 if (!is.complex(Signal_data) && !is.numeric(Signal_data)) {
115 stop("Signal_data contains non-numerical values.")
116 }
117 }
118 if (!is.null(Signal_info) && !is.matrix(Signal_info)) {
119 stop("Signal_info is not a matrix.")
120 }
121
122
123 Original_data <- Signal_data
124
125 # Extract the real part of the spectrum ---------------------------
126
127 if (force.real) {
128 if (is.complex(Signal_data)) {
129 Signal_data <- Re(Signal_data)
130 } else {
131 # The signal is numeric Im(Signal_data) is zero anyway so let's avoid
132 # using complex(real=...,imaginary=0) which would give a complex signal
133 # in endTreatment()
134 force.real <- FALSE
135 }
136 }
137
138
139 # Return the formatted data and metadata entries --------------------
140
141 return(list(start = proc.time(), vec = vec, force.real = force.real,
142 Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info))
143 }
144
145
146 # endTreatment ==============================================================================
147
148 endTreatment <- function(name, begin_info, Signal_data) {
149 end_time = proc.time() # record it as soon as possible
150 start_time = begin_info[["start"]]
151 delta_time = end_time - start_time
152 delta = delta_time[]
153 cat("End", name, "\n")
154 cat("It lasted",
155 round(delta["user.self"], 3), "s user time,",
156 round(delta["sys.self"] , 3), "s system time and",
157 round(delta["elapsed"] , 3), "s elapsed time.\n")
158 if (begin_info[["force.real"]]) {
159 # The imaginary part is left untouched
160 i <- complex(real=0, imaginary=1)
161 Signal_data = Signal_data + i * Im(begin_info[["Original_data"]])
162 }
163 if (begin_info[["vec"]]) {
164 Signal_data = Signal_data[1,]
165 }
166 return(Signal_data)
167 }
168
169
170 # checkArg ==============================================================================
171
172 checkArg <- function(arg, checks, can.be.null=FALSE) {
173 check.list <- list(bool=c(is.logical, "a boolean"),
174 int =c(function(x){x%%1==0}, "an integer"),
175 num =c(is.numeric, "a numeric"),
176 str =c(is.character, "a string"),
177 pos =c(function(x){x>0}, "positive"),
178 pos0=c(function(x){x>=0}, "positive or zero"),
179 l1 =c(function(x){length(x)==1}, "of length 1")
180 )
181 if (is.null(arg)) {
182 if (!can.be.null) {
183 stop(deparse(substitute(arg)), " is null.")
184 }
185 } else {
186 if (is.matrix(arg)) {
187 stop(deparse(substitute(arg)), " is not scalar.")
188 }
189 for (c in checks) {
190 if (!check.list[[c]][[1]](arg)) {
191 stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".")
192 }
193 }
194 }
195 }
196
197
198 # getArg ==============================================================================
199
200 getArg <- function(arg, info, argname, can.be.absent=FALSE) {
201 if (is.null(arg)) {
202 start <- paste("impossible to get argument", argname, "it was not given directly and");
203 if (!is.matrix(info)) {
204 stop(paste(start, "the info matrix was not given"))
205 }
206 if (!(argname %in% colnames(info))) {
207 if (can.be.absent) {
208 return(NULL)
209 } else {
210 stop(paste(start, "is not in the info matrix"))
211 }
212 }
213 if (nrow(info) < 1) {
214 stop(paste(start, "the info matrix has no row"))
215 }
216 arg <- info[1,argname]
217 if (is.na(arg)) {
218 stop(paste(start, "it is NA in the info matrix"))
219 }
220 }
221 return(arg)
222 }
223
224
225
226 # getTitle ==============================================================================
227
228 # Get the name of the signal from the title file or fromt the name of the subdirectory
229 # Get the name of the signal from the title file or fromt the name of the subdirectory
230
231 getTitle <- function(path, l, subdirs) {
232 title <- NULL
233 title_file <- file.path(file.path(file.path(path, "pdata"), "1"), "title")
234 if (file.exists(title_file)) {
235 lines <- readLines(title_file, warn = FALSE)
236 if (length(lines) >= 1) {
237 first_line <- gsub("^\\s+|\\s+$", "", lines[l])
238 if (nchar(first_line) >= 1) {
239 title <- first_line
240 } else {
241 warning(paste("The", l ,"line of the title file is blank for directory ",
242 path, "and the (sub)dirs names are used instead"))
243 }
244 } else {
245 warning(paste("The title file is empty for directory ", path, "and the (sub)dirs names are used instead"))
246 }
247 } else {
248 warning(paste("Title file doesn't exists for directory ", path, "\n the (sub)dirs names are used instead"))
249 }
250 if (is.null(title)) {
251 if(subdirs) {
252 separator <- .Platform$file.sep
253 path_elem <- strsplit(path,separator)[[1]]
254 title <- paste(path_elem[length(path_elem)-1], path_elem[length(path_elem)], sep = "_")
255 } else{title <- basename(path)}
256 }
257 return(title)
258 }
259
260
261
262
263 # readParams ==============================================================================
264 # Read parameter values for Fid_info in the ReadFids function
265
266 readParams <- function(file, paramsName) {
267
268 isDigit <- function(c) {
269 return(suppressWarnings(!is.na(as.numeric(c))))
270 }
271 lines <- readLines(file)
272 params <- sapply(paramsName, function(x) NULL)
273
274 for (paramName in paramsName) {
275 # Find the line with the parameter I add a '$' '=' in the pattern so that for
276 # example 'TD0' is not found where I look for 'TD' and LOCSW and WBSW when I look
277 # for 'SW'
278 pattern <- paste("\\$", paramName, "=", sep = "")
279 occurences <- grep(pattern, lines)
280 if (length(occurences) == 0L) {
281 stop(paste(file, "has no field", pattern))
282 }
283 if (length(occurences) > 1L) {
284 warning(paste(file, "has more that one field", pattern, " I take the first one"))
285 }
286 line <- lines[occurences[1]]
287
288 # Cut beginning and end of the line '##$TD= 65536' -> '65536'
289 igual = as.numeric(regexpr("=", line))
290
291 first <- igual
292 while (first <= nchar(line) & !isDigit(substr(line, first, first))) {
293 first <- first + 1
294 }
295 last <- nchar(line)
296 while (last > 0 & !isDigit(substr(line, last, last))) {
297 last <- last - 1
298 }
299 params[paramName] <- as.numeric(substr(line, first, last))
300 }
301 return(params)
302 }
303
304
305
306 # ReadFids ==============================================================================
307
308 ReadFids <- function(path, l = 1, subdirs = FALSE, dirs.names = FALSE) {
309
310 # Data initialisation and checks ----------------------------------------------
311 begin_info <- beginTreatment("ReadFids")
312 checkArg(path, c("str"))
313 checkArg(l, c("pos"))
314 if (file.exists(path) == FALSE) {
315 stop(paste("Invalid path:", path))
316 }
317
318
319 # Extract the FIDs and their info ----------------------------------------------
320
321 if (subdirs == FALSE) {
322 fidDirs <- getDirsContainingFid(path)
323 n <- length(fidDirs)
324 if (n == 0L) {
325 stop(paste("No valid fid in", path))
326 }
327 if (dirs.names) {
328 separator <- .Platform$file.sep
329 path_elem <- strsplit(fidDirs,separator)
330 fidNames <- sapply(path_elem, function(x) x[[length(path_elem[[1]])]])
331 }else {fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F)}
332
333 for (i in 1:n) {
334 fidList <- ReadFid(fidDirs[i])
335 fid <- fidList[["fid"]]
336 info <- fidList[["params"]]
337 m <- length(fid)
338 if (i == 1) {
339 Fid_data <- matrix(nrow = n, ncol = m, dimnames = list(fidNames,
340 info[["DT"]] * (0:(m - 1))))
341 Fid_info <- matrix(nrow = n, ncol = length(info), dimnames = list(fidNames,
342 names(info)))
343 }
344 Fid_data[i, ] <- fid
345 Fid_info[i, ] <- unlist(info)
346 }
347
348 } else {
349 maindirs <- dir(path, full.names = TRUE) # subdirectories
350 Fid_data <- numeric()
351 Fid_info <- numeric()
352
353 fidDirs <- c()
354 for (j in maindirs) {
355 fd <- getDirsContainingFid(j) # recoved FIDs from subdirectories
356 n <- length(fd)
357 if (n > 0L) {
358 fidDirs <- c(fidDirs, fd)
359 } else {warning(paste("No valid fid in",j ))}
360 }
361
362 if (dirs.names==TRUE) {
363 if (length(fidDirs)!= length(dir(path))) { # at least one subdir contains more than 1 FID
364 separator <- .Platform$file.sep
365 path_elem <- strsplit(fidDirs,separator)
366 fidNames <- sapply(path_elem, function(x) paste(x[[length(path_elem[[1]])-1]],
367 x[[length(path_elem[[1]])]], sep = "_"))
368 }else {fidNames <- dir(path)}
369
370 } else {fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F)}
371
372 for (i in 1:length(fidNames)) {
373 fidList <- ReadFid(fidDirs[i])
374 fid <- fidList[["fid"]]
375 info <- fidList[["params"]]
376 m <- length(fid)
377 if (i == 1) {
378 Fid_data <- matrix(nrow = length(fidNames), ncol = m, dimnames = list(fidNames,
379 info[["DT"]] * (0:(m - 1))))
380 Fid_info <- matrix(nrow = length(fidNames), ncol = length(info), dimnames = list(fidNames,
381 names(info)))
382 }
383 Fid_data[i, ] <- fid
384 Fid_info[i, ] <- unlist(info)
385 }
386
387
388 }
389
390 # Check for non-unique IDs ----------------------------------------------
391 NonnuniqueIds <- sum(duplicated(row.names(Fid_data)))
392 cat("dim Fid_data: ", dim(Fid_data), "\n")
393 cat("IDs: ", rownames(Fid_data), "\n")
394 cat("non-unique IDs?", NonnuniqueIds, "\n")
395 if (NonnuniqueIds > 0) {
396 warning("There are duplicated IDs: ", Fid_data[duplicated(Fid_data)])
397 }
398
399
400 # Return the results ----------------------------------------------
401 return(list(Fid_data = endTreatment("ReadFids", begin_info, Fid_data), Fid_info = Fid_info))
402
403 }
404