annotate PubchemConn.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents 253d531a0193
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
1 #####################
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
2 # CLASS DECLARATION #
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
3 #####################
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
4
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
5 PubchemConn <- methods::setRefClass("PubchemConn", contains = "RemotedbConn", fields = list( .db = "character" ))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
6
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
7 ###############
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
8 # CONSTRUCTOR #
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
9 ###############
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
10
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
11 PubchemConn$methods( initialize = function(db = BIODB.PUBCHEMCOMP, ...) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
12 .db <<- db
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
13 callSuper(...)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
14 })
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
15
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
16 ##########################
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
17 # GET ENTRY CONTENT TYPE #
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
18 ##########################
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
19
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
20 PubchemConn$methods( getEntryContentType = function() {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
21 return(BIODB.XML)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
22 })
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
23
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
24 #####################
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
25 # GET ENTRY CONTENT #
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
26 #####################
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
27
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
28 PubchemConn$methods( getEntryContent = function(ids) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
29
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
30 # Debug
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
31 .self$.print.debug.msg(paste0("Get entry content(s) for ", length(ids)," id(s)..."))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
32
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
33 URL.MAX.LENGTH <- 2083
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
34
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
35 # Initialize return values
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
36 content <- rep(NA_character_, length(ids))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
37
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
38 # Loop on all
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
39 n <- 0
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
40 while (n < length(ids)) {
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
41
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
42 # Get list of accession ids to retrieve
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
43 accessions <- ids[(n + 1):length(ids)]
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
44
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
45 # Create URL request
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
46 x <- get.entry.url(class = .self$.db, accession = accessions, content.type = BIODB.XML, max.length = URL.MAX.LENGTH)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
47
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
48 # Debug
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
49 .self$.print.debug.msg(paste0("Send URL request for ", x$n," id(s)..."))
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
50
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
51 # Send request
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
52 xmlstr <- .self$.get.url(x$url)
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
53
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
54 # Increase number of entries retrieved
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
55 n <- n + x$n
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
56
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
57 # TODO When one of the id is wrong, no content is returned. Only a single error is returned, with the first faulty ID:
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
58 # <Fault xmlns="http://pubchem.ncbi.nlm.nih.gov/pug_rest" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" xs:schemaLocation="http://pubchem.ncbi.nlm.nih.gov/pug_rest https://pubchem.ncbi.nlm.nih.gov/pug_rest/pug_rest.xsd">
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
59 # <Code>PUGREST.NotFound</Code>
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
60 # <Message>Record not found</Message>
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
61 # <Details>No record data for CID 1246452553</Details>
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
62 # </Fault>
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
63
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
64 # Parse XML and get included XML
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
65 if ( ! is.na(xmlstr)) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
66 xml <- xmlInternalTreeParse(xmlstr, asText = TRUE)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
67 ns <- c(pcns = "http://www.ncbi.nlm.nih.gov")
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
68 returned.ids <- xpathSApply(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) 'PC-CompoundType_id_cid' else 'PC-ID_id'), xmlValue, namespaces = ns)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
69 content[match(returned.ids, ids)] <- vapply(getNodeSet(xml, paste0("//pcns:", if (.self$.db == BIODB.PUBCHEMCOMP) "PC-Compound" else 'PC-Substance'), namespaces = ns), saveXML, FUN.VALUE = '')
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
70 }
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
71
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
72 # Debug
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
73 .self$.print.debug.msg(paste0("Now ", length(ids) - n," id(s) left to be retrieved..."))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
74 }
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
75
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
76 return(content)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
77 })
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
78
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
79 ################
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
80 # CREATE ENTRY #
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
81 ################
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
82
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
83 PubchemConn$methods( createEntry = function(content, drop = TRUE) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
84 return(if (.self$.db == BIODB.PUBCHEMCOMP) createPubchemEntryFromXml(content, drop = drop) else createPubchemSubstanceFromXml(content, drop = drop))
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
85 })
0
e66bb061af06 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff changeset
86
2
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
87 #########################
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
88 # GET PUBCHEM IMAGE URL #
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
89 #########################
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
90
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
91 get.pubchem.image.url <- function(id, db = BIODB.PUBCHEMCOMP) {
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
92
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
93 url <- paste0('http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?', (if (db == BIODB.PUBCHEMCOMP) 'cid' else 'sid'), '=', id, '&t=l')
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
94
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
95 return(url)
20d69a062da3 planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
prog
parents: 1
diff changeset
96 }