lcmsmatching: PubchemConn.R comparison

comparison PubchemConn.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8

author	prog
date	Thu, 02 Mar 2017 08:55:00 -0500
parents	253d531a0193
children

comparison

equal deleted inserted replaced

-:253d531a0193
+:20d69a062da3
-if ( ! exists('get.pubchem.compound.url')) { # Do not load again if already loaded
+#####################
+# CLASS DECLARATION #
+#####################
-	source('RemotedbConn.R')
+PubchemConn <- methods::setRefClass("PubchemConn", contains = "RemotedbConn", fields = list( .db = "character" ))
-	source('PubchemCompound.R')
-	#####################
-	# CLASS DECLARATION #
-	#####################
-	PubchemConn <- setRefClass("PubchemConn", contains = "RemotedbConn")
-	##########################
+###############
-	# GET ENTRY CONTENT TYPE #
+# CONSTRUCTOR #
-	##########################
+###############
-	PubchemConn$methods( getEntryContentType = function(type) {
+PubchemConn$methods( initialize = function(db = BIODB.PUBCHEMCOMP, ...) {
-		return(BIODB.XML)
+	.db <<- db
-	})
+	callSuper(...)
+})
-	#####################
+##########################
-	# GET ENTRY CONTENT #
+# GET ENTRY CONTENT TYPE #
-	#####################
+##########################
-	PubchemConn$methods( getEntryContent = function(type, id) {
-		if (type == BIODB.COMPOUND) {
+PubchemConn$methods( getEntryContentType = function() {
+	return(BIODB.XML)
+})
-			# Initialize return values
+#####################
-			content <- rep(NA_character_, length(id))
+# GET ENTRY CONTENT #
+#####################
-			# Request
+PubchemConn$methods( getEntryContent = function(ids) {
-			content <- vapply(id, function(x) .self$.scheduler$getUrl(get.entry.url(BIODB.PUBCHEM, x, content.type = BIODB.XML)), FUN.VALUE = '')
-			return(content)
+	# Debug
+	.self$.print.debug.msg(paste0("Get entry content(s) for ", length(ids)," id(s)..."))
+	URL.MAX.LENGTH <- 2083
+	# Initialize return values
+	content <- rep(NA_character_, length(ids))
+	# Loop on all
+	n <- 0
+	while (n < length(ids)) {
+		# Get list of accession ids to retrieve
+		accessions <- ids[(n + 1):length(ids)]
+		# Create URL request
+		x <- get.entry.url(class = .self$.db, accession = accessions, content.type = BIODB.XML, max.length = URL.MAX.LENGTH)
+		# Debug
+		.self$.print.debug.msg(paste0("Send URL request for ", x$n," id(s)..."))
+		# Send request
+		xmlstr <- .self$.get.url(x$url)
+		# Increase number of entries retrieved
+		n <- n + x$n
+		# TODO When one of the id is wrong, no content is returned. Only a single error is returned, with the first faulty ID:
+#		<Fault xmlns="http://pubchem.ncbi.nlm.nih.gov/pug_rest" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" xs:schemaLocation="http://pubchem.ncbi.nlm.nih.gov/pug_rest https://pubchem.ncbi.nlm.nih.gov/pug_rest/pug_rest.xsd">
+#		<Code>PUGREST.NotFound</Code>
+#		<Message>Record not found</Message>
+#		<Details>No record data for CID 1246452553</Details>
+#		</Fault>
+		# Parse XML and get included XML
+		if ( ! is.na(xmlstr)) {
+			xml <-  xmlInternalTreeParse(xmlstr, asText = TRUE)
+			ns <- c(pcns = "http://www.ncbi.nlm.nih.gov")
+			returned.ids <- xpathSApply(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) 'PC-CompoundType_id_cid' else 'PC-ID_id'), xmlValue, namespaces = ns)
+			content[match(returned.ids, ids)] <- vapply(getNodeSet(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) "PC-Compound" else 'PC-Substance'), namespaces = ns), saveXML, FUN.VALUE = '')
 		}
-		return(NULL)
+		# Debug
-	})
+		.self$.print.debug.msg(paste0("Now ", length(ids) - n," id(s) left to be retrieved..."))
+	}
-	################
-	# CREATE ENTRY #
-	################
-	PubchemConn$methods( createEntry = function(type, content, drop = TRUE) {
-		return(if (type == BIODB.COMPOUND) createPubchemCompoundFromXml(content, drop = drop) else NULL)
-	})
-	#########################
+	return(content)
-	# GET PUBCHEM IMAGE URL #
+})
-	#########################
-	get.pubchem.image.url <- function(id) {
-		url <- paste0('http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?cid=', id, '&t=l')
-		return(url)
+################
-	}
+# CREATE ENTRY #
+################
-} # end of load safe guard
+PubchemConn$methods( createEntry = function(content, drop = TRUE) {
+	return(if (.self$.db == BIODB.PUBCHEMCOMP) createPubchemEntryFromXml(content, drop = drop) else createPubchemSubstanceFromXml(content, drop = drop))
+})
+#########################
+# GET PUBCHEM IMAGE URL #
+#########################
+get.pubchem.image.url <- function(id, db = BIODB.PUBCHEMCOMP) {
+	url <- paste0('http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?', (if (db == BIODB.PUBCHEMCOMP) 'cid' else 'sid'), '=', id, '&t=l')
+	return(url)
+}

Mercurial > repos > prog > lcmsmatching

comparison PubchemConn.R @ 2:20d69a062da3 draft