Mercurial > repos > prog > lcmsmatching
view PubchemConn.R @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author | prog |
---|---|
date | Thu, 02 Mar 2017 08:55:00 -0500 |
parents | 253d531a0193 |
children |
line wrap: on
line source
##################### # CLASS DECLARATION # ##################### PubchemConn <- methods::setRefClass("PubchemConn", contains = "RemotedbConn", fields = list( .db = "character" )) ############### # CONSTRUCTOR # ############### PubchemConn$methods( initialize = function(db = BIODB.PUBCHEMCOMP, ...) { .db <<- db callSuper(...) }) ########################## # GET ENTRY CONTENT TYPE # ########################## PubchemConn$methods( getEntryContentType = function() { return(BIODB.XML) }) ##################### # GET ENTRY CONTENT # ##################### PubchemConn$methods( getEntryContent = function(ids) { # Debug .self$.print.debug.msg(paste0("Get entry content(s) for ", length(ids)," id(s)...")) URL.MAX.LENGTH <- 2083 # Initialize return values content <- rep(NA_character_, length(ids)) # Loop on all n <- 0 while (n < length(ids)) { # Get list of accession ids to retrieve accessions <- ids[(n + 1):length(ids)] # Create URL request x <- get.entry.url(class = .self$.db, accession = accessions, content.type = BIODB.XML, max.length = URL.MAX.LENGTH) # Debug .self$.print.debug.msg(paste0("Send URL request for ", x$n," id(s)...")) # Send request xmlstr <- .self$.get.url(x$url) # Increase number of entries retrieved n <- n + x$n # TODO When one of the id is wrong, no content is returned. Only a single error is returned, with the first faulty ID: # <Fault xmlns="http://pubchem.ncbi.nlm.nih.gov/pug_rest" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" xs:schemaLocation="http://pubchem.ncbi.nlm.nih.gov/pug_rest https://pubchem.ncbi.nlm.nih.gov/pug_rest/pug_rest.xsd"> # <Code>PUGREST.NotFound</Code> # <Message>Record not found</Message> # <Details>No record data for CID 1246452553</Details> # </Fault> # Parse XML and get included XML if ( ! is.na(xmlstr)) { xml <- xmlInternalTreeParse(xmlstr, asText = TRUE) ns <- c(pcns = "http://www.ncbi.nlm.nih.gov") returned.ids <- xpathSApply(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) 'PC-CompoundType_id_cid' else 'PC-ID_id'), xmlValue, namespaces = ns) content[match(returned.ids, ids)] <- vapply(getNodeSet(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) "PC-Compound" else 'PC-Substance'), namespaces = ns), saveXML, FUN.VALUE = '') } # Debug .self$.print.debug.msg(paste0("Now ", length(ids) - n," id(s) left to be retrieved...")) } return(content) }) ################ # CREATE ENTRY # ################ PubchemConn$methods( createEntry = function(content, drop = TRUE) { return(if (.self$.db == BIODB.PUBCHEMCOMP) createPubchemEntryFromXml(content, drop = drop) else createPubchemSubstanceFromXml(content, drop = drop)) }) ######################### # GET PUBCHEM IMAGE URL # ######################### get.pubchem.image.url <- function(id, db = BIODB.PUBCHEMCOMP) { url <- paste0('http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?', (if (db == BIODB.PUBCHEMCOMP) 'cid' else 'sid'), '=', id, '&t=l') return(url) }