diff PeakforestEntry.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PeakforestEntry.R	Thu Mar 02 08:55:00 2017 -0500
@@ -0,0 +1,250 @@
+#####################
+# CLASS DECLARATION #
+#####################
+
+# TODO Create class PeakforestCompoundEntry
+PeakForestSpectrumEntry <- methods::setRefClass("PeakForestSpectrumEntry", contains = "BiodbEntry")
+
+PeakForestCompoundEntry <- methods::setRefClass("PeakForestCompoundEntry", contains = "BiodbEntry")
+
+
+###########
+# FACTORY #
+###########
+
+
+###Arg is jcontent ot indicate that the content is already a json.
+createPeakforestCompoundFromJSON <- function(contents, drop = FALSE) {
+	
+	if(is.character(contents))
+		contents <- jsonlite::fromJSON(contents, simplifyDataFrame=FALSE)
+	
+	jsonfields <- list()
+	jsonfields[[BIODB.ACCESSION]] <- "id"
+	jsonfields[[BIODB.PUBCHEMCOMP.ID]] <- "PubChemCID"
+	jsonfields[[BIODB.CHEBI.ID]] <- "ChEBI"
+	jsonfields[[BIODB.HMDB.ID]] <- "HMDB"
+	jsonfields[[BIODB.KEGG.ID]] <- "KEGG"
+	jsonfields[[BIODB.FORMULA]] <- "formula"
+	jsonfields[[BIODB.SMILES]] <- "canSmiles"
+	jsonfields[[BIODB.AVERAGE.MASS]] <- "averageMass"
+	jsonfields[[BIODB.MONOISOTOPIC.MASS]] <- "monoisotopicMass"
+	jsonfields[[BIODB.INCHI]] <- "inChI"
+	jsonfields[[BIODB.INCHIKEY]] <- "inchiIKey"
+	jsonfields[[BIODB.NAME]] <- "mainName"
+
+	entries <- vector(length(contents),mode="list")
+	
+	for (i in seq_along(contents)){
+		
+		jsontree <- contents[[i]]
+		entry <- PeakForestCompoundEntry$new()
+		
+		
+		for(field in names(jsonfields)){
+			
+			tosearch <- jsonfields[[field]]
+			value <- jsontree$tosearch
+			entry$setField(field,value)
+		}
+		
+		entries[[i]] <- entry
+	}
+	
+	
+	if (drop && length(contents) == 1)
+		entries <- entries[[1]]
+	
+	entries
+}
+
+createPeakforestSpectraFromJSON <- function(contents, drop = FALSE, checkSub = TRUE) {
+	
+	entries <- vector(length(contents),mode="list")
+	jsonfields <- character()
+	jsonfields[[BIODB.ACCESSION]] <- "id" # TODO Use BIODB.ACCESSION instead
+	jsonfields[[BIODB.MSMODE]] <- "polarity"
+	
+	
+	###Checking that it's a list.
+	if(length(contents) == 1){
+		if(startsWith(contents[[1]], "<html>") ){
+			return(NULL)
+		}else{
+			contents <- jsonlite::fromJSON(contents[[1]],simplifyDataFrame=FALSE)	
+			
+		}
+	}
+	
+	for (i in seq_along(contents)){
+		
+		content <- contents[[i]]
+		jsontree <- NULL
+		if(typeof(content) == "character"){
+			if(startsWith(content, "<html>")|content=="null"){
+				entries[[i]] <- NULL
+				next
+			}
+			jsontree <- jsonlite::fromJSON(content,simplifyDataFrame=FALSE)
+		}else{
+			jsontree <- content
+		}
+		cnames <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.FORMULA, BIODB.PEAK.MZTHEO, BIODB.PEAK.ERROR.PPM)
+		
+		entry <- PeakForestSpectrumEntry$new()
+		#####Setting thz mass analyzer
+		entry$setField(BIODB.MSDEV,jsontree$analyzerMassSpectrometerDevice$instrumentName)
+		entry$setField(BIODB.MSDEVTYPE,jsontree$analyzerMassSpectrometerDevice$ionAnalyzerType)	
+		
+		
+		
+		for(field in names(jsonfields)){
+			
+			tosearch <- jsonfields[[field]]
+			value <- jsontree$tosearch
+			entry$setField(field,value)
+		}
+		
+		######################
+		# TREATING THE PEAKS #
+		######################
+		
+		entry$setField(BIODB.NB.PEAKS,length(jsontree$peaks))
+		peaks <- data.frame( matrix( 0,ncol = length(cnames), nrow = 0))
+		colnames(peaks) <- cnames
+		###Parsing peaks.
+		if(length(jsontree$peaks) != 0){
+			peaks <- sapply(jsontree$peaks,function(x){
+				return(list(as.double(x$mz),
+							as.integer(x$ri),
+							as.character(x$composition),
+							as.double(x$theoricalMass),
+							as.double(x$deltaPPM)
+				))
+			})
+			###Removing all whitespaces from the formule.
+			peaks[3,]<-vapply(peaks[3,],function(x){
+				gsub(" ","",trimws(x))
+			},FUN.VALUE = NA_character_)
+			
+			peaks<-t(peaks)
+			colnames(peaks)<-cnames
+		}
+		
+		entry$setField(BIODB.PEAKS,peaks)
+		
+		##################################
+		# TREATING THE LIST OF COMPOUNDS #
+		##################################
+		
+		entry$setField(BIODB.NB.COMPOUNDS,length(jsontree$listOfCompounds))
+		compounds <- list()
+		
+		###Parsing compounds.
+		if( length( jsontree$listOfCompounds) != 0){
+			compounds <- lapply( jsontree$listOfCompounds, function(x){
+				createPeakforestCompoundFromJSON(x)
+			})
+		}
+		
+		entry$setField(BIODB.COMPOUNDS, compounds)
+		
+		
+		entries[[i]] <- entry
+	}
+	
+	
+	if (drop && length(contents) == 1)
+		entries <- entries[[1]]
+	
+	entries
+}
+
+
+####TDO CLEAN THIS
+
+createReducedSpectraFromJSON <- function(contents,
+			 drop = FALSE,
+			 checkSub = TRUE) {
+	entries <- vector(length(contents), mode = "list")
+	jsonfields <- character()
+	# jsonfields[[BIODB.ACCESSION]] <-
+	# 	"id" # TODO Use BIODB.ACCESSION instead
+	
+	
+	###Checking that it's a list.
+	if (length(contents) == 1) {
+		if (startsWith(contents[[1]], "<html>")) {
+			return(NULL)
+		} else{
+			contents <- jsonlite::fromJSON(contents[[1]], simplifyDataFrame=FALSE)
+			
+		}
+	}
+	
+	for (i in seq_along(contents)) {
+		content <- contents[[i]]
+		jsontree <- NULL
+		if (typeof(content) == "character") {
+			if (startsWith(content, "<html>") | content == "null") {
+				entries[[i]] <- NULL
+				next
+			}
+			jsontree <- jsonlite::fromJSON(content, simplifyDataFrame=FALSE)
+		} else{
+			jsontree <- content
+		}
+		
+		
+		cnames <-
+			c(
+				BIODB.PEAK.MZ,
+				BIODB.PEAK.RELATIVE.INTENSITY,
+				BIODB.PEAK.FORMULA,
+				BIODB.PEAK.MZTHEO,
+				BIODB.PEAK.ERROR.PPM
+			)
+		
+		entry <- PeakForestSpectrumEntry$new()
+		entry$setField(BIODB.ACCESSION, jsontree$id)
+		
+		######################
+		# TREATING THE PEAKS #
+		######################
+		
+		entry$setField(BIODB.NB.PEAKS, length(jsontree$peaks))
+		peaks <- data.frame(matrix(0, ncol = length(cnames), nrow = 0))
+		colnames(peaks) <- cnames
+		###Parsing peaks.
+		if (length(jsontree$peaks) != 0) {
+			peaks <- sapply(jsontree$peaks, function(x) {
+				return(
+					list(
+						as.double(x$mz),
+						as.integer(x$ri),
+						as.character(x$composition),
+						as.double(x$theoricalMass),
+						as.double(x$deltaPPM)
+					)
+				)
+			})
+			###Removing all whitespaces from the formule.
+			peaks[3, ] <- vapply(peaks[3, ], function(x) {
+				gsub(" ", "", trimws(x))
+			}, FUN.VALUE = NA_character_)
+			
+			peaks <- as.data.frame(t(peaks))
+			colnames(peaks) <- cnames
+		}
+		
+		entry$setField(BIODB.PEAKS, peaks)
+		
+		entries[[i]] <- entry
+	}
+	
+	
+	if (drop && length(contents) == 1)
+		entries <- entries[[1]]
+	
+	entries
+}