comparison ChemspiderEntry.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents
children
comparison
equal deleted inserted replaced
1:253d531a0193 2:20d69a062da3
1 #####################
2 # CLASS DECLARATION #
3 #####################
4
5 ChemspiderEntry <- methods::setRefClass("ChemspiderEntry", contains = "BiodbEntry")
6
7 ############################
8 # CREATE COMPOUND FROM XML #
9 ############################
10
11 createChemspiderEntryFromXml <- function(contents, drop = TRUE) {
12
13 entries <- list()
14
15 # Define xpath expressions
16 xpath.expr <- character()
17 xpath.expr[[BIODB.ACCESSION]] <- "//CSID"
18 xpath.expr[[BIODB.FORMULA]] <- "//MF"
19 xpath.expr[[BIODB.NAME]] <- "//CommonName"
20 xpath.expr[[BIODB.AVERAGE.MASS]] <- "//AverageMass"
21 xpath.expr[[BIODB.INCHI]] <- "//InChI"
22 xpath.expr[[BIODB.INCHIKEY]] <- "//InChIKey"
23 xpath.expr[[BIODB.SMILES]] <- "//SMILES"
24
25 for (content in contents) {
26
27 # Create instance
28 entry <- ChemspiderEntry$new()
29
30 if ( ! is.null(content) && ! is.na(content) && content != 'NA') {
31
32 # Parse XML
33 xml <- XML::xmlInternalTreeParse(content, asText = TRUE)
34
35 # Test generic xpath expressions
36 for (field in names(xpath.expr)) {
37 v <- XML::xpathSApply(xml, xpath.expr[[field]], XML::xmlValue)
38 if (length(v) > 0)
39 entry$setField(field, v)
40 }
41 }
42
43 entries <- c(entries, entry)
44 }
45
46 # Replace elements with no accession id by NULL
47 entries <- lapply(entries, function(x) if (is.na(x$getField(BIODB.ACCESSION))) NULL else x)
48
49 # If the input was a single element, then output a single object
50 if (drop && length(contents) == 1)
51 entries <- entries[[1]]
52
53 return(entries)
54 }
55
56 #############################
57 # CREATE COMPOUND FROM HTML #
58 #############################
59
60 createChemspiderEntryFromHtml <- function(contents, drop = TRUE) {
61
62 entries <- list()
63
64 # Define xpath expressions
65 xpath.expr <- character()
66
67 for (content in contents) {
68
69 # Create instance
70 entry <- ChemspiderEntry$new()
71
72 if ( ! is.null(content) && ! is.na(content)) {
73
74 # Parse HTML
75 xml <- XML::htmlTreeParse(content, asText = TRUE, useInternalNodes = TRUE)
76
77 # Test generic xpath expressions
78 for (field in names(xpath.expr)) {
79 v <- XML::xpathSApply(xml, xpath.expr[[field]], XML::xmlValue)
80 if (length(v) > 0)
81 entry$setField(field, v)
82 }
83
84 # Get accession
85 accession <- XML::xpathSApply(xml, "//li[starts-with(., 'ChemSpider ID')]", XML::xmlValue)
86 if (length(accession) > 0) {
87 accession <- sub('^ChemSpider ID([0-9]+)$', '\\1', accession, perl = TRUE)
88 entry$setField(BIODB.ACCESSION, accession)
89 }
90 }
91
92 entries <- c(entries, entry)
93 }
94
95 # Replace elements with no accession id by NULL
96 entries <- lapply(entries, function(x) if (is.na(x$getField(BIODB.ACCESSION))) NULL else x)
97
98 # If the input was a single element, then output a single object
99 if (drop && length(contents) == 1)
100 entries <- entries[[1]]
101
102 return(entries)
103 }