Mercurial > repos > prog > lcmsmatching
diff PeakforestEntry.R @ 2:20d69a062da3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author | prog |
---|---|
date | Thu, 02 Mar 2017 08:55:00 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PeakforestEntry.R Thu Mar 02 08:55:00 2017 -0500 @@ -0,0 +1,250 @@ +##################### +# CLASS DECLARATION # +##################### + +# TODO Create class PeakforestCompoundEntry +PeakForestSpectrumEntry <- methods::setRefClass("PeakForestSpectrumEntry", contains = "BiodbEntry") + +PeakForestCompoundEntry <- methods::setRefClass("PeakForestCompoundEntry", contains = "BiodbEntry") + + +########### +# FACTORY # +########### + + +###Arg is jcontent ot indicate that the content is already a json. +createPeakforestCompoundFromJSON <- function(contents, drop = FALSE) { + + if(is.character(contents)) + contents <- jsonlite::fromJSON(contents, simplifyDataFrame=FALSE) + + jsonfields <- list() + jsonfields[[BIODB.ACCESSION]] <- "id" + jsonfields[[BIODB.PUBCHEMCOMP.ID]] <- "PubChemCID" + jsonfields[[BIODB.CHEBI.ID]] <- "ChEBI" + jsonfields[[BIODB.HMDB.ID]] <- "HMDB" + jsonfields[[BIODB.KEGG.ID]] <- "KEGG" + jsonfields[[BIODB.FORMULA]] <- "formula" + jsonfields[[BIODB.SMILES]] <- "canSmiles" + jsonfields[[BIODB.AVERAGE.MASS]] <- "averageMass" + jsonfields[[BIODB.MONOISOTOPIC.MASS]] <- "monoisotopicMass" + jsonfields[[BIODB.INCHI]] <- "inChI" + jsonfields[[BIODB.INCHIKEY]] <- "inchiIKey" + jsonfields[[BIODB.NAME]] <- "mainName" + + entries <- vector(length(contents),mode="list") + + for (i in seq_along(contents)){ + + jsontree <- contents[[i]] + entry <- PeakForestCompoundEntry$new() + + + for(field in names(jsonfields)){ + + tosearch <- jsonfields[[field]] + value <- jsontree$tosearch + entry$setField(field,value) + } + + entries[[i]] <- entry + } + + + if (drop && length(contents) == 1) + entries <- entries[[1]] + + entries +} + +createPeakforestSpectraFromJSON <- function(contents, drop = FALSE, checkSub = TRUE) { + + entries <- vector(length(contents),mode="list") + jsonfields <- character() + jsonfields[[BIODB.ACCESSION]] <- "id" # TODO Use BIODB.ACCESSION instead + jsonfields[[BIODB.MSMODE]] <- "polarity" + + + ###Checking that it's a list. + if(length(contents) == 1){ + if(startsWith(contents[[1]], "<html>") ){ + return(NULL) + }else{ + contents <- jsonlite::fromJSON(contents[[1]],simplifyDataFrame=FALSE) + + } + } + + for (i in seq_along(contents)){ + + content <- contents[[i]] + jsontree <- NULL + if(typeof(content) == "character"){ + if(startsWith(content, "<html>")|content=="null"){ + entries[[i]] <- NULL + next + } + jsontree <- jsonlite::fromJSON(content,simplifyDataFrame=FALSE) + }else{ + jsontree <- content + } + cnames <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.FORMULA, BIODB.PEAK.MZTHEO, BIODB.PEAK.ERROR.PPM) + + entry <- PeakForestSpectrumEntry$new() + #####Setting thz mass analyzer + entry$setField(BIODB.MSDEV,jsontree$analyzerMassSpectrometerDevice$instrumentName) + entry$setField(BIODB.MSDEVTYPE,jsontree$analyzerMassSpectrometerDevice$ionAnalyzerType) + + + + for(field in names(jsonfields)){ + + tosearch <- jsonfields[[field]] + value <- jsontree$tosearch + entry$setField(field,value) + } + + ###################### + # TREATING THE PEAKS # + ###################### + + entry$setField(BIODB.NB.PEAKS,length(jsontree$peaks)) + peaks <- data.frame( matrix( 0,ncol = length(cnames), nrow = 0)) + colnames(peaks) <- cnames + ###Parsing peaks. + if(length(jsontree$peaks) != 0){ + peaks <- sapply(jsontree$peaks,function(x){ + return(list(as.double(x$mz), + as.integer(x$ri), + as.character(x$composition), + as.double(x$theoricalMass), + as.double(x$deltaPPM) + )) + }) + ###Removing all whitespaces from the formule. + peaks[3,]<-vapply(peaks[3,],function(x){ + gsub(" ","",trimws(x)) + },FUN.VALUE = NA_character_) + + peaks<-t(peaks) + colnames(peaks)<-cnames + } + + entry$setField(BIODB.PEAKS,peaks) + + ################################## + # TREATING THE LIST OF COMPOUNDS # + ################################## + + entry$setField(BIODB.NB.COMPOUNDS,length(jsontree$listOfCompounds)) + compounds <- list() + + ###Parsing compounds. + if( length( jsontree$listOfCompounds) != 0){ + compounds <- lapply( jsontree$listOfCompounds, function(x){ + createPeakforestCompoundFromJSON(x) + }) + } + + entry$setField(BIODB.COMPOUNDS, compounds) + + + entries[[i]] <- entry + } + + + if (drop && length(contents) == 1) + entries <- entries[[1]] + + entries +} + + +####TDO CLEAN THIS + +createReducedSpectraFromJSON <- function(contents, + drop = FALSE, + checkSub = TRUE) { + entries <- vector(length(contents), mode = "list") + jsonfields <- character() + # jsonfields[[BIODB.ACCESSION]] <- + # "id" # TODO Use BIODB.ACCESSION instead + + + ###Checking that it's a list. + if (length(contents) == 1) { + if (startsWith(contents[[1]], "<html>")) { + return(NULL) + } else{ + contents <- jsonlite::fromJSON(contents[[1]], simplifyDataFrame=FALSE) + + } + } + + for (i in seq_along(contents)) { + content <- contents[[i]] + jsontree <- NULL + if (typeof(content) == "character") { + if (startsWith(content, "<html>") | content == "null") { + entries[[i]] <- NULL + next + } + jsontree <- jsonlite::fromJSON(content, simplifyDataFrame=FALSE) + } else{ + jsontree <- content + } + + + cnames <- + c( + BIODB.PEAK.MZ, + BIODB.PEAK.RELATIVE.INTENSITY, + BIODB.PEAK.FORMULA, + BIODB.PEAK.MZTHEO, + BIODB.PEAK.ERROR.PPM + ) + + entry <- PeakForestSpectrumEntry$new() + entry$setField(BIODB.ACCESSION, jsontree$id) + + ###################### + # TREATING THE PEAKS # + ###################### + + entry$setField(BIODB.NB.PEAKS, length(jsontree$peaks)) + peaks <- data.frame(matrix(0, ncol = length(cnames), nrow = 0)) + colnames(peaks) <- cnames + ###Parsing peaks. + if (length(jsontree$peaks) != 0) { + peaks <- sapply(jsontree$peaks, function(x) { + return( + list( + as.double(x$mz), + as.integer(x$ri), + as.character(x$composition), + as.double(x$theoricalMass), + as.double(x$deltaPPM) + ) + ) + }) + ###Removing all whitespaces from the formule. + peaks[3, ] <- vapply(peaks[3, ], function(x) { + gsub(" ", "", trimws(x)) + }, FUN.VALUE = NA_character_) + + peaks <- as.data.frame(t(peaks)) + colnames(peaks) <- cnames + } + + entry$setField(BIODB.PEAKS, peaks) + + entries[[i]] <- entry + } + + + if (drop && length(contents) == 1) + entries <- entries[[1]] + + entries +}