Mercurial > repos > prog > lcmsmatching
annotate ChemspiderCompound.R @ 1:253d531a0193 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
author | prog |
---|---|
date | Sat, 03 Sep 2016 17:02:01 -0400 |
parents | e66bb061af06 |
children |
rev | line source |
---|---|
0
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
1 if ( ! exists('ChemspiderCompound')) { # Do not load again if already loaded |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
2 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
3 source('BiodbEntry.R') |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
4 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
5 ##################### |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
6 # CLASS DECLARATION # |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
7 ##################### |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
8 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
9 ChemspiderCompound <- setRefClass("ChemspiderCompound", contains = "BiodbEntry") |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
10 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
11 ########### |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
12 # FACTORY # |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
13 ########### |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
14 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
15 createChemspiderCompoundFromHtml <- function(contents, drop = TRUE) { |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
16 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
17 library(XML) |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
18 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
19 compounds <- list() |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
20 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
21 # Define xpath expressions |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
22 xpath.expr <- character() |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
23 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
24 for (html in contents) { |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
25 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
26 # Create instance |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
27 compound <- ChemspiderCompound$new() |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
28 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
29 # Parse HTML |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
30 xml <- htmlTreeParse(html, asText = TRUE, useInternalNodes = TRUE) |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
31 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
32 # Test generic xpath expressions |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
33 for (field in names(xpath.expr)) { |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
34 v <- xpathSApply(xml, xpath.expr[[field]], xmlValue) |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
35 if (length(v) > 0) |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
36 compound$setField(field, v) |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
37 } |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
38 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
39 # Get accession |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
40 accession <- xpathSApply(xml, "//li[starts-with(., 'ChemSpider ID')]", xmlValue) |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
41 if (length(accession) > 0) { |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
42 accession <- sub('^ChemSpider ID([0-9]+)$', '\\1', accession, perl = TRUE) |
1
253d531a0193
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
prog
parents:
0
diff
changeset
|
43 compound$setField(BIODB.ACCESSION, accession) |
0
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
44 } |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
45 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
46 compounds <- c(compounds, compound) |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
47 } |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
48 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
49 # Replace elements with no accession id by NULL |
1
253d531a0193
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 36c9d8099c20a1ae848f1337c16564335dd8fb2b
prog
parents:
0
diff
changeset
|
50 compounds <- lapply(compounds, function(x) if (is.na(x$getField(BIODB.ACCESSION))) NULL else x) |
0
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
51 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
52 # If the input was a single element, then output a single object |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
53 if (drop && length(contents) == 1) |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
54 compounds <- compounds[[1]] |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
55 |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
56 return(compounds) |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
57 } |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
58 } |
e66bb061af06
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
prog
parents:
diff
changeset
|
59 |