comparison msdb-common.R @ 5:fb9c0409d85c draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author prog
date Wed, 19 Apr 2017 10:00:05 -0400
parents 20d69a062da3
children
comparison
equal deleted inserted replaced
4:b34c14151f25 5:fb9c0409d85c
7 ############# 7 #############
8 # CONSTANTS # 8 # CONSTANTS #
9 ############# 9 #############
10 10
11 # Field tags 11 # Field tags
12 MSDB.TAG.MZ <- BIODB.PEAK.MZ 12 MSDB.TAG.MZ <- 'mz'
13 MSDB.TAG.MZEXP <- BIODB.PEAK.MZEXP 13 MSDB.TAG.MZEXP <- 'mzexp'
14 MSDB.TAG.MZTHEO <- BIODB.PEAK.MZTHEO 14 MSDB.TAG.MZTHEO <- 'mztheo'
15 MSDB.TAG.RT <- BIODB.PEAK.RT 15 MSDB.TAG.RT <- 'rt'
16 MSDB.TAG.MODE <- BIODB.MSMODE 16 MSDB.TAG.MODE <- 'msmode'
17 MSDB.TAG.MOLID <- BIODB.COMPOUND.ID 17 MSDB.TAG.MOLID <- 'compoundid'
18 MSDB.TAG.COL <- BIODB.CHROM.COL 18 MSDB.TAG.COL <- 'chromcol'
19 MSDB.TAG.COLRT <- BIODB.CHROM.COL.RT 19 MSDB.TAG.COLRT <- 'chromcolrt'
20 MSDB.TAG.ATTR <- BIODB.PEAK.ATTR 20 MSDB.TAG.ATTR <- 'peakattr'
21 MSDB.TAG.INT <- BIODB.PEAK.INTENSITY 21 MSDB.TAG.INT <- 'intensity'
22 MSDB.TAG.REL <- BIODB.PEAK.RELATIVE.INTENSITY 22 MSDB.TAG.REL <- 'relative.intensity'
23 MSDB.TAG.COMP <- BIODB.PEAK.COMP 23 MSDB.TAG.COMP <- 'peakcomp'
24 MSDB.TAG.MOLNAMES <- BIODB.FULLNAMES 24 MSDB.TAG.MOLNAMES <- 'fullnames'
25 MSDB.TAG.MOLCOMP <- BIODB.COMPOUND.MASS 25 MSDB.TAG.MOLCOMP <- 'compoundmass'
26 # MSDB.TAG.MOLATTR <- 'molattr' 26 MSDB.TAG.MOLMASS <- 'compoundcomp'
27 MSDB.TAG.MOLMASS <- BIODB.COMPOUND.COMP 27 MSDB.TAG.INCHI <- 'inchi'
28 MSDB.TAG.INCHI <- BIODB.INCHI 28 MSDB.TAG.INCHIKEY <- 'inchikey'
29 MSDB.TAG.INCHIKEY <- BIODB.INCHIKEY 29 MSDB.TAG.PUBCHEM <- 'pubchemcompid'
30 # TODO Use BIODB tags. 30 MSDB.TAG.CHEBI <- 'chebiid'
31 MSDB.TAG.PUBCHEM <- BIODB.PUBCHEMCOMP.ID 31 MSDB.TAG.HMDB <- 'hmdbid'
32 MSDB.TAG.CHEBI <- BIODB.CHEBI.ID 32 MSDB.TAG.KEGG <- 'keggid'
33 MSDB.TAG.HMDB <- BIODB.HMDB.ID
34 MSDB.TAG.KEGG <- BIODB.KEGG.ID
35 33
36 # Mode tags 34 # Mode tags
37 MSDB.TAG.POS <- BIODB.MSMODE.NEG 35 MSDB.TAG.POS <- 'neg'
38 MSDB.TAG.NEG <- BIODB.MSMODE.POS 36 MSDB.TAG.NEG <- 'pos'
39 37
40 # Fields containing multiple values 38 # Fields containing multiple values
41 MSDB.MULTIVAL.FIELDS <- c(MSDB.TAG.MOLNAMES) 39 MSDB.MULTIVAL.FIELDS <- c(MSDB.TAG.MOLNAMES)
42 MSDB.MULTIVAL.FIELD.SEP <- ';' 40 MSDB.MULTIVAL.FIELD.SEP <- ';'
43 41
44 # Authorized mz tolerance unit values 42 # Authorized mz tolerance unit values
45 MSDB.MZTOLUNIT.PPM <- 'ppm' 43 MSDB.MZTOLUNIT.PPM <- 'ppm'
46 MSDB.MZTOLUNIT.PLAIN <- 'plain' # same as mz: mass-to-charge ratio 44 MSDB.MZTOLUNIT.PLAIN <- 'plain' # same as mz: mass-to-charge ratio
47 MSDB.MZTOLUNIT.VALS <- c(MSDB.MZTOLUNIT.PPM, MSDB.MZTOLUNIT.PLAIN) 45 MSDB.MZTOLUNIT.VALS <- c(MSDB.MZTOLUNIT.PPM, MSDB.MZTOLUNIT.PLAIN)
48 46
47 # Authorized rt units
48 MSDB.RTUNIT.SEC <- 'sec'
49 MSDB.RTUNIT.MIN <- 'min'
50 MSDB.RTUNIT.VALS <- c(MSDB.RTUNIT.SEC ,MSDB.RTUNIT.MIN)
51
49 # Default values 52 # Default values
50 MSDB.DFT.PREC <- list() 53 MSDB.DFT.PREC <- list()
51 MSDB.DFT.PREC[[MSDB.TAG.POS]] <- c("[(M+H)]+", "[M+H]+", "[(M+Na)]+", "[M+Na]+", "[(M+K)]+", "[M+K]+") 54 MSDB.DFT.PREC[[MSDB.TAG.POS]] <- c("[(M+H)]+", "[M+H]+", "[(M+Na)]+", "[M+Na]+", "[(M+K)]+", "[M+K]+")
52 MSDB.DFT.PREC[[MSDB.TAG.NEG]] <- c("[(M-H)]-", "[M-H]-", "[(M+Cl)]-", "[M+Cl]-") 55 MSDB.DFT.PREC[[MSDB.TAG.NEG]] <- c("[(M-H)]-", "[M-H]-", "[(M+Cl)]-", "[M+Cl]-")
53 MSDB.DFT.OUTPUT.FIELDS <- list( mz = 'mz', rt = 'rt', col = 'col', colrt = 'colrt', molid = 'id', attr = 'attribution', comp = 'composition', int = 'intensity', rel = 'relative', mzexp = 'mzexp', mztheo = 'mztheo', msmatching = 'msmatching', molnames = 'molnames', molcomp = 'molcomp', molmass = 'molmass', inchi = 'inchi', inchikey = 'inchikey', pubchem = 'pubchem', chebi = 'chebi', hmdb = 'hmdb', kegg = 'kegg')
54 MSDB.DFT.OUTPUT.MULTIVAL.FIELD.SEP <- MSDB.MULTIVAL.FIELD.SEP 56 MSDB.DFT.OUTPUT.MULTIVAL.FIELD.SEP <- MSDB.MULTIVAL.FIELD.SEP
55 MSDB.DFT.MATCH.FIELDS <- list( molids = 'molid', molnames = 'molnames') 57 MSDB.DFT.MATCH.FIELDS <- list( molids = 'molid', molnames = 'molnames')
56 MSDB.DFT.MATCH.SEP <- ',' 58 MSDB.DFT.MATCH.SEP <- ','
57 MSDB.DFT.MODES <- list( pos = 'POS', neg = 'NEG') 59 MSDB.DFT.MODES <- list( pos = 'POS', neg = 'NEG')
58 MSDB.DFT.MZTOLUNIT <- MSDB.MZTOLUNIT.PPM 60 MSDB.DFT.MZTOLUNIT <- MSDB.MZTOLUNIT.PPM
69 dft.fields[[f]] <- f 71 dft.fields[[f]] <- f
70 72
71 return(dft.fields) 73 return(dft.fields)
72 } 74 }
73 75
74 #############################
75 # GET DEFAULT OUTPUT FIELDS #
76 #############################
77
78 msdb.get.dft.output.fields <- function () {
79
80 dft.fields <- list()
81
82 for(f in c(MSDB.TAG.MZ, MSDB.TAG.RT, MSDB.TAG.COL, MSDB.TAG.COLRT, MSDB.TAG.MOLID, MSDB.TAG.ATTR, MSDB.TAG.COMP, MSDB.TAG.INT, MSDB.TAG.REL, MSDB.TAG.MZEXP, MSDB.TAG.MZTHEO, MSDB.TAG.MOLNAMES, MSDB.TAG.MOLCOMP, MSDB.TAG.MOLMASS, MSDB.TAG.INCHI, MSDB.TAG.INCHIKEY, MSDB.TAG.PUBCHEM, MSDB.TAG.CHEBI, MSDB.TAG.HMDB, MSDB.TAG.KEGG))
83 dft.fields[[f]] <- f
84
85 return(dft.fields)
86 }
87
88 ######################### 76 #########################
89 # GET DEFAULT DB FIELDS # 77 # GET DEFAULT DB FIELDS #
90 ######################### 78 #########################
91 79
92 msdb.get.dft.db.fields <- function () { 80 msdb.get.dft.db.fields <- function () {
116 104
117 ######################### 105 #########################
118 # MAKE INPUT DATA FRAME # 106 # MAKE INPUT DATA FRAME #
119 ######################### 107 #########################
120 108
121 msdb.make.input.df <- function(mz, rt = NULL) { 109 msdb.make.input.df <- function(mz, rt = NULL, rtunit = MSDB.RTUNIT.SEC) {
122 110
123 field <- msdb.get.dft.input.fields() 111 field <- msdb.get.dft.input.fields()
124 112
125 x <- data.frame() 113 x <- data.frame()
126 114
132 else 120 else
133 x[, field[[MSDB.TAG.MZ]]] <- numeric() 121 x[, field[[MSDB.TAG.MZ]]] <- numeric()
134 122
135 # Set rt 123 # Set rt
136 if ( ! is.null(rt)) { 124 if ( ! is.null(rt)) {
125 if (rtunit == MSDB.RTUNIT.MIN)
126 rtunit <- rtunit * 60
137 if (length(rt) > 1) 127 if (length(rt) > 1)
138 x[seq(rt), field[[MSDB.TAG.RT]]] <- rt 128 x[seq(rt), field[[MSDB.TAG.RT]]] <- rt
139 else if (length(rt) == 1) 129 else if (length(rt) == 1)
140 x[1, field[[MSDB.TAG.RT]]] <- rt 130 x[1, field[[MSDB.TAG.RT]]] <- rt
141 else 131 else