Mercurial > repos > prog > lcmsmatching
view biodb-common.R @ 0:e66bb061af06 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author | prog |
---|---|
date | Tue, 12 Jul 2016 12:02:37 -0400 |
parents | |
children | 253d531a0193 |
line wrap: on
line source
if ( ! exists('RBIODB.COMPOUND')) { # Do not load again if already loaded ############# # CONSTANTS # ############# # Entry types RBIODB.COMPOUND <- 'compound' RBIODB.SPECTRUM <- 'spectrum' # Entry content types RBIODB.HTML <- 'html' RBIODB.TXT <- 'txt' RBIODB.XML <- 'xml' RBIODB.CSV <- 'csv' RBIODB.ANY <- 'any' # Class names RBIODB.CHEBI <- 'chebi' RBIODB.KEGG <- 'kegg' RBIODB.PUBCHEM <- 'pubchem' RBIODB.HMDB <- 'hmdb' RBIODB.CHEMSPIDER <- 'chemspider' RBIODB.ENZYME <- 'enzyme' RBIODB.LIPIDMAPS <- 'lipidmaps' RBIODB.MIRBASE <- 'mirbase' RBIODB.NCBIGENE <- 'ncbigene' RBIODB.NCBICCDS <- 'ncbiccds' RBIODB.UNIPROT <- 'uniprot' RBIODB.MASSBANK <- 'massbank' # Fields RBIODB.COMPOUND <- 'compound' RBIODB.ACCESSION <- 'accession' RBIODB.DESCRIPTION <- 'description' RBIODB.PROTEIN.DESCRIPTION <- 'protdesc' RBIODB.NAME <- 'name' RBIODB.FULLNAMES <- 'fullnames' RBIODB.SYNONYMS <- 'synonyms' RBIODB.SYMBOL <- 'symbol' RBIODB.GENE.SYMBOLS <- 'genesymbols' RBIODB.CHEBI.ID <- 'chebiid' RBIODB.LIPIDMAPS.ID <- 'lipidmapsid' RBIODB.KEGG.ID <- 'keggid' RBIODB.HMDB.ID <- 'hmdbid' RBIODB.ENZYME.ID <- 'enzymeid' RBIODB.NCBI.CCDS.ID <- 'ncbiccdsid' RBIODB.NCBI.GENE.ID <- 'ncbigeneid' RBIODB.PUBCHEM.ID <- 'pubchemid' RBIODB.UNIPROT.ID <- 'uniprotid' RBIODB.INCHI <- 'inchi' RBIODB.INCHIKEY <- 'inchikey' RBIODB.MSDEV <- 'msdev' RBIODB.MSDEVTYPE <- 'msdevtype' RBIODB.MSTYPE <- 'mstype' RBIODB.MSMODE <- 'msmode' RBIODB.MSPRECMZ <- 'msprecmz' # numeric RBIODB.MSPRECANNOT <- 'msprecannot' RBIODB.FORMULA <- 'formula' RBIODB.SUPER.CLASS <- 'superclass' RBIODB.MASS <- 'mass' RBIODB.AVERAGE.MASS <- 'averagemass' RBIODB.MONOISOTOPIC.MASS <- 'monoisotopicmass' RBIODB.SEQUENCE <- 'sequence' RBIODB.LOCATION <- 'location' RBIODB.LENGTH <- 'length' RBIODB.NB.PEAKS <- 'nbpeaks' RBIODB.NB.PEAKS <- 'nbpeaks' RBIODB.PEAKS <- 'peaks' # Mode values RBIODB.MSMODE.NEG <- 'neg' RBIODB.MSMODE.POS <- 'pos' # Cardinalities RBIODB.CARD.ONE <- '1' RBIODB.CARD.MANY <- '*' # Field attributes RBIODB.FIELDS <- data.frame(matrix(c( # FIELD NAME CLASS CARDINALITY RBIODB.COMPOUND, 'BiodEntry', RBIODB.CARD.ONE, RBIODB.ACCESSION, 'character', RBIODB.CARD.ONE, RBIODB.DESCRIPTION, 'character', RBIODB.CARD.ONE, RBIODB.NAME, 'character', RBIODB.CARD.ONE, RBIODB.FULLNAMES, 'character', RBIODB.CARD.MANY, RBIODB.SYNONYMS, 'character', RBIODB.CARD.MANY, RBIODB.PROTEIN.DESCRIPTION, 'character', RBIODB.CARD.ONE, RBIODB.SYMBOL, 'character', RBIODB.CARD.ONE, RBIODB.GENE.SYMBOLS, 'character', RBIODB.CARD.MANY, RBIODB.CHEBI.ID, 'character', RBIODB.CARD.ONE, RBIODB.LIPIDMAPS.ID, 'character', RBIODB.CARD.ONE, RBIODB.KEGG.ID, 'character', RBIODB.CARD.ONE, RBIODB.HMDB.ID, 'character', RBIODB.CARD.ONE, RBIODB.ENZYME.ID, 'character', RBIODB.CARD.ONE, RBIODB.PUBCHEM.ID, 'character', RBIODB.CARD.ONE, RBIODB.UNIPROT.ID, 'character', RBIODB.CARD.ONE, RBIODB.NCBI.CCDS.ID, 'character', RBIODB.CARD.ONE, RBIODB.NCBI.GENE.ID, 'character', RBIODB.CARD.ONE, RBIODB.INCHI, 'character', RBIODB.CARD.ONE, RBIODB.INCHIKEY, 'character', RBIODB.CARD.ONE, RBIODB.MSDEV, 'character', RBIODB.CARD.ONE, RBIODB.MSDEVTYPE, 'character', RBIODB.CARD.ONE, RBIODB.MSTYPE, 'character', RBIODB.CARD.ONE, RBIODB.MSMODE, 'character', RBIODB.CARD.ONE, RBIODB.MSPRECMZ, 'double', RBIODB.CARD.ONE, RBIODB.MSPRECANNOT, 'character', RBIODB.CARD.ONE, RBIODB.FORMULA, 'character', RBIODB.CARD.ONE, RBIODB.SUPER.CLASS, 'character', RBIODB.CARD.ONE, RBIODB.MASS, 'double', RBIODB.CARD.ONE, RBIODB.AVERAGE.MASS, 'double', RBIODB.CARD.ONE, RBIODB.MONOISOTOPIC.MASS, 'double', RBIODB.CARD.ONE, RBIODB.SEQUENCE, 'character', RBIODB.CARD.ONE, RBIODB.LENGTH, 'integer', RBIODB.CARD.ONE, RBIODB.LOCATION, 'character', RBIODB.CARD.ONE, RBIODB.NB.PEAKS, 'integer', RBIODB.CARD.ONE, RBIODB.PEAKS, 'data.frame', RBIODB.CARD.ONE ), byrow = TRUE, ncol = 3), stringsAsFactors = FALSE) colnames(RBIODB.FIELDS) <- c('name', 'class', 'cardinality') # How to compute a missing field ? RBIODB.FIELD.COMPUTING <- list() RBIODB.FIELD.COMPUTING[[RBIODB.INCHI]] <- c(RBIODB.CHEBI) RBIODB.FIELD.COMPUTING[[RBIODB.INCHIKEY]] <- c(RBIODB.CHEBI) RBIODB.FIELD.COMPUTING[[RBIODB.SEQUENCE]] <- c(RBIODB.NCBICCDS) # Peaks data frame columns RBIODB.PEAK.MZ <- 'mz' RBIODB.PEAK.FORMULA <- 'formula' RBIODB.PEAK.FORMULA.COUNT <- 'formula.count' RBIODB.PEAK.MASS <- 'mass' RBIODB.PEAK.ERROR.PPM <- 'error.ppm' RBIODB.PEAK.INTENSITY <- 'intensity' RBIODB.PEAK.RELATIVE.INTENSITY <- 'relative.intensity' RBIODB.PEAK.DF.EXAMPLE <- data.frame(mz = double(), int = double(), rel.int = integer(), formula = character(), formula.count <- integer(), mass = double(), error = double(), stringsAsFactors = FALSE) colnames(RBIODB.PEAK.DF.EXAMPLE) <- c(RBIODB.PEAK.MZ, RBIODB.PEAK.INTENSITY, RBIODB.PEAK.RELATIVE.INTENSITY, RBIODB.PEAK.FORMULA, RBIODB.PEAK.FORMULA.COUNT, RBIODB.PEAK.MASS, RBIODB.PEAK.ERROR.PPM) ################# # GET ENTRY URL # ################# # TODO Let the choice to use either jp or eu RBIODB.MASSBANK.JP.WS.URL <- "http://www.massbank.jp/api/services/MassBankAPI/getRecordInfo" RBIODB.MASSBANK.EU.WS.URL <- "http://massbank.eu/api/services/MassBankAPI/getRecordInfo" get.entry.url <- function(class, accession, content.type = RBIODB.ANY) { url <- switch(class, chebi = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ebi.ac.uk/chebi/searchId.do?chebiId=', accession) else NULL, chemspider = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.chemspider.com/Chemical-Structure.', accession, '.html') else NULL, enzyme = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0('http://enzyme.expasy.org/EC/', accession, '.txt') else NULL, hmdb = switch(content.type, xml = paste0('http://www.hmdb.ca/metabolites/', accession, '.xml'), html = paste0('http://www.hmdb.ca/metabolites/', accession), any = paste0('http://www.hmdb.ca/metabolites/', accession), NULL), kegg = switch(content.type, txt = paste0('http://rest.kegg.jp/get/', accession), html = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession), any = paste0('http://www.genome.jp/dbget-bin/www_bget?cpd:', accession), NULL), lipidmaps = if (content.type %in% c(RBIODB.ANY, RBIODB.CSV)) paste0('http://www.lipidmaps.org/data/LMSDRecord.php?Mode=File&LMID=', accession, '&OutputType=CSV&OutputQuote=No') else NULL, massbank = if (content.type %in% c(RBIODB.ANY, RBIODB.TXT)) paste0(RBIODB.MASSBANK.EU.WS.URL, '?ids=', paste(accession, collapse = ',')) else NULL, mirbase = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('http://www.mirbase.org/cgi-bin/mature.pl?mature_acc=', accession) else NULL, pubchem = { accession <- gsub(' ', '', accession, perl = TRUE) accession <- gsub('^CID', '', accession, perl = TRUE) switch(content.type, xml = paste0('http://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/', accession, '/XML/?response_type=save&response_basename=CID_', accession), html = paste0('http://pubchem.ncbi.nlm.nih.gov/compound/', accession), NULL) }, ncbigene = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=', accession, '&rettype=xml&retmode=text') else NULL, ncbiccds = if (content.type %in% c(RBIODB.ANY, RBIODB.HTML)) paste0('https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&GO=MainBrowse&DATA=', accession), uniprot = if (content.type %in% c(RBIODB.ANY, RBIODB.XML)) paste0('http://www.uniprot.org/uniprot/', accession, '.xml'), NULL ) return(url) } }