comparison PubchemConn.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents 253d531a0193
children
comparison
equal deleted inserted replaced
1:253d531a0193 2:20d69a062da3
1 if ( ! exists('get.pubchem.compound.url')) { # Do not load again if already loaded 1 #####################
2 # CLASS DECLARATION #
3 #####################
2 4
3 source('RemotedbConn.R') 5 PubchemConn <- methods::setRefClass("PubchemConn", contains = "RemotedbConn", fields = list( .db = "character" ))
4 source('PubchemCompound.R')
5
6 #####################
7 # CLASS DECLARATION #
8 #####################
9
10 PubchemConn <- setRefClass("PubchemConn", contains = "RemotedbConn")
11 6
12 ########################## 7 ###############
13 # GET ENTRY CONTENT TYPE # 8 # CONSTRUCTOR #
14 ########################## 9 ###############
15 10
16 PubchemConn$methods( getEntryContentType = function(type) { 11 PubchemConn$methods( initialize = function(db = BIODB.PUBCHEMCOMP, ...) {
17 return(BIODB.XML) 12 .db <<- db
18 }) 13 callSuper(...)
14 })
19 15
20 ##################### 16 ##########################
21 # GET ENTRY CONTENT # 17 # GET ENTRY CONTENT TYPE #
22 ##################### 18 ##########################
23
24 PubchemConn$methods( getEntryContent = function(type, id) {
25 19
26 if (type == BIODB.COMPOUND) { 20 PubchemConn$methods( getEntryContentType = function() {
21 return(BIODB.XML)
22 })
27 23
28 # Initialize return values 24 #####################
29 content <- rep(NA_character_, length(id)) 25 # GET ENTRY CONTENT #
26 #####################
30 27
31 # Request 28 PubchemConn$methods( getEntryContent = function(ids) {
32 content <- vapply(id, function(x) .self$.scheduler$getUrl(get.entry.url(BIODB.PUBCHEM, x, content.type = BIODB.XML)), FUN.VALUE = '')
33 29
34 return(content) 30 # Debug
31 .self$.print.debug.msg(paste0("Get entry content(s) for ", length(ids)," id(s)..."))
32
33 URL.MAX.LENGTH <- 2083
34
35 # Initialize return values
36 content <- rep(NA_character_, length(ids))
37
38 # Loop on all
39 n <- 0
40 while (n < length(ids)) {
41
42 # Get list of accession ids to retrieve
43 accessions <- ids[(n + 1):length(ids)]
44
45 # Create URL request
46 x <- get.entry.url(class = .self$.db, accession = accessions, content.type = BIODB.XML, max.length = URL.MAX.LENGTH)
47
48 # Debug
49 .self$.print.debug.msg(paste0("Send URL request for ", x$n," id(s)..."))
50
51 # Send request
52 xmlstr <- .self$.get.url(x$url)
53
54 # Increase number of entries retrieved
55 n <- n + x$n
56
57 # TODO When one of the id is wrong, no content is returned. Only a single error is returned, with the first faulty ID:
58 # <Fault xmlns="http://pubchem.ncbi.nlm.nih.gov/pug_rest" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" xs:schemaLocation="http://pubchem.ncbi.nlm.nih.gov/pug_rest https://pubchem.ncbi.nlm.nih.gov/pug_rest/pug_rest.xsd">
59 # <Code>PUGREST.NotFound</Code>
60 # <Message>Record not found</Message>
61 # <Details>No record data for CID 1246452553</Details>
62 # </Fault>
63
64 # Parse XML and get included XML
65 if ( ! is.na(xmlstr)) {
66 xml <- xmlInternalTreeParse(xmlstr, asText = TRUE)
67 ns <- c(pcns = "http://www.ncbi.nlm.nih.gov")
68 returned.ids <- xpathSApply(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) 'PC-CompoundType_id_cid' else 'PC-ID_id'), xmlValue, namespaces = ns)
69 content[match(returned.ids, ids)] <- vapply(getNodeSet(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) "PC-Compound" else 'PC-Substance'), namespaces = ns), saveXML, FUN.VALUE = '')
35 } 70 }
36 71
37 return(NULL) 72 # Debug
38 }) 73 .self$.print.debug.msg(paste0("Now ", length(ids) - n," id(s) left to be retrieved..."))
39 74 }
40 ################
41 # CREATE ENTRY #
42 ################
43
44 PubchemConn$methods( createEntry = function(type, content, drop = TRUE) {
45 return(if (type == BIODB.COMPOUND) createPubchemCompoundFromXml(content, drop = drop) else NULL)
46 })
47 75
48 ######################### 76 return(content)
49 # GET PUBCHEM IMAGE URL # 77 })
50 #########################
51
52 get.pubchem.image.url <- function(id) {
53
54 url <- paste0('http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?cid=', id, '&t=l')
55 78
56 return(url) 79 ################
57 } 80 # CREATE ENTRY #
58 81 ################
59 } # end of load safe guard 82
83 PubchemConn$methods( createEntry = function(content, drop = TRUE) {
84 return(if (.self$.db == BIODB.PUBCHEMCOMP) createPubchemEntryFromXml(content, drop = drop) else createPubchemSubstanceFromXml(content, drop = drop))
85 })
86
87 #########################
88 # GET PUBCHEM IMAGE URL #
89 #########################
90
91 get.pubchem.image.url <- function(id, db = BIODB.PUBCHEMCOMP) {
92
93 url <- paste0('http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?', (if (db == BIODB.PUBCHEMCOMP) 'cid' else 'sid'), '=', id, '&t=l')
94
95 return(url)
96 }