diff MsPeakForestDb.R @ 4:b34c14151f25 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 494194bb501d1d7033613131865f7bd68976041c
author prog
date Tue, 14 Mar 2017 12:40:22 -0400
parents f61ce21ed17c
children fb9c0409d85c
line wrap: on
line diff
--- a/MsPeakForestDb.R	Thu Mar 02 11:07:56 2017 -0500
+++ b/MsPeakForestDb.R	Tue Mar 14 12:40:22 2017 -0400
@@ -259,15 +259,27 @@
 		spectra <- .self$.get.url(url = url)
 
 		# Build result data frame
-		results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character())
+		results <- data.frame(MSDB.TAG.MOLID = character(), MSDB.TAG.MOLNAMES = character(), MSDB.TAG.MOLMASS = numeric(), MSDB.TAG.MZTHEO = numeric(), MSDB.TAG.COMP = character(), MSDB.TAG.ATTR = character(), MSDB.TAG.INCHI = character(), MSDB.TAG.INCHIKEY = character(), MSDB.TAG.CHEBI = character(), MSDB.TAG.HMDB = character(), MSDB.TAG.KEGG = character(), MSDB.TAG.PUBCHEM = character())
 		for (x in spectra) {
 			if ('source' %in% names(x) && is.list(x$source))
-				results <- rbind(results, data.frame(MSDB.TAG.MOLID = vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = ''),
-				                                     MSDB.TAG.MOLNAMES = vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = ''),
-													 MSDB.TAG.MZTHEO = as.numeric(x$theoricalMass),
-													 MSDB.TAG.COMP = as.character(x$composition),
-													 MSDB.TAG.ATTR = as.character(x$attribution),
-													 stringsAsFactors = FALSE))
+				mztheo <- if ('theoricalMass' %in% names(x)) as.numeric(x$theoricalMass) else NA_real_
+				comp <- if ('composition' %in% names(x)) x$composition else NA_character_
+				attr <- if ('attribution' %in% names(x)) x$attribution else NA_character_
+				if ('listOfCompounds' %in% names(x$source)) {
+					molids <- vapply(x$source$listOfCompounds, function(c) as.character(c$id), FUN.VALUE = '')
+					molnames <- vapply(x$source$listOfCompounds, function(c) paste(c$names, collapse = MSDB.MULTIVAL.FIELD.SEP), FUN.VALUE = '')
+					mass <- vapply(x$source$listOfCompounds, function(c) as.character(c$averageMass), FUN.VALUE = '')
+					inchi <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChI), FUN.VALUE = '')
+					inchikey <- vapply(x$source$listOfCompounds, function(c) as.character(c$inChIKey), FUN.VALUE = '')
+					chebi <- vapply(x$source$listOfCompounds, function(c) as.character(c$ChEBI), FUN.VALUE = '')
+					chebi[chebi == 'CHEBI:null'] <- NA_character_
+					hmdb <- vapply(x$source$listOfCompounds, function(c) as.character(c$HMDB), FUN.VALUE = '')
+					hmdb[hmdb == 'HMDBnull'] <- NA_character_
+					kegg <- vapply(x$source$listOfCompounds, function(c) as.character(c$KEGG), FUN.VALUE = '')
+					pubchem <- vapply(x$source$listOfCompounds, function(c) as.character(c$PubChemCID), FUN.VALUE = '')
+					if (length(molids) > 0 && length(molids) == length(molnames))
+						results <- rbind(results, data.frame(MSDB.TAG.MOLID = molids, MSDB.TAG.MOLNAMES = molnames, MSDB.TAG.MOLMASS = mass, MSDB.TAG.MZTHEO = mztheo, MSDB.TAG.COMP = comp, MSDB.TAG.ATTR = attr, MSDB.TAG.INCHI = inchi, MSDB.TAG.INCHIKEY = inchikey, MSDB.TAG.CHEBI = chebi, MSDB.TAG.HMDB = hmdb, MSDB.TAG.KEGG = kegg, MSDB.TAG.PUBCHEM = pubchem, stringsAsFactors = FALSE))
+				}
 		}
 
 		# RT search