comparison KeggCompound.R @ 0:e66bb061af06 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author prog
date Tue, 12 Jul 2016 12:02:37 -0400
parents
children 253d531a0193
comparison
equal deleted inserted replaced
-1:000000000000 0:e66bb061af06
1 if ( ! exists('KeggCompound')) { # Do not load again if already loaded
2
3 source('BiodbEntry.R')
4
5 #####################
6 # CLASS DECLARATION #
7 #####################
8
9 KeggCompound <- setRefClass("KeggCompound", contains = 'BiodbEntry')
10
11 ###########
12 # FACTORY #
13 ###########
14
15 createKeggCompoundFromTxt <- function(contents, drop = TRUE) {
16
17 library(stringr)
18
19 compounds <- list()
20
21 # Define fields regex
22 regex <- character()
23 regex[[RBIODB.NAME]] <- "^NAME\\s+([^,;]+)"
24 regex[[RBIODB.CHEBI.ID]] <- "^\\s+ChEBI:\\s+(\\S+)"
25 regex[[RBIODB.LIPIDMAPS.ID]] <- "^\\s+LIPIDMAPS:\\s+(\\S+)"
26
27 for (text in contents) {
28
29 # Create instance
30 compound <- KeggCompound$new()
31
32 lines <- strsplit(text, "\n")
33 for (s in lines[[1]]) {
34
35 # Test generic regex
36 parsed <- FALSE
37 for (field in names(regex)) {
38 g <- str_match(s, regex[[field]])
39 if ( ! is.na(g[1,1])) {
40 compound$setField(field, g[1,2])
41 parsed <- TRUE
42 break
43 }
44 }
45 if (parsed)
46 next
47
48 # ACCESSION
49 {
50 # ENZYME ID
51 g <- str_match(s, "^ENTRY\\s+EC\\s+(\\S+)")
52 if ( ! is.na(g[1,1]))
53 compound$setField(RBIODB.ACCESSION, paste('ec', g[1,2], sep = ':'))
54
55 # ENTRY ID
56 else {
57 g <- str_match(s, "^ENTRY\\s+(\\S+)\\s+Compound")
58 if ( ! is.na(g[1,1]))
59 compound$setField(RBIODB.ACCESSION, paste('cpd', g[1,2], sep = ':'))
60
61 # OTHER ID
62 else {
63 g <- str_match(s, "^ENTRY\\s+(\\S+)")
64 if ( ! is.na(g[1,1]))
65 compound$setField(RBIODB.ACCESSION, g[1,2])
66 }
67 }
68
69 # ORGANISM
70 g <- str_match(s, "^ORGANISM\\s+(\\S+)")
71 if ( ! is.na(g[1,1]))
72 compound$setField(RBIODB.ACCESSION, paste(g[1,2], compound$getField(RBIODB.ACCESSION), sep = ':'))
73 }
74 }
75
76 compounds <- c(compounds, compound)
77 }
78
79 # Replace elements with no accession id by NULL
80 compounds <- lapply(compounds, function(x) if (is.na(x$getField(RBIODB.ACCESSION))) NULL else x)
81
82 # If the input was a single element, then output a single object
83 if (drop && length(contents) == 1)
84 compounds <- compounds[[1]]
85
86 return(compounds)
87 }
88 }