view PeakforestEntry.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents
children
line wrap: on
line source

#####################
# CLASS DECLARATION #
#####################

# TODO Create class PeakforestCompoundEntry
PeakForestSpectrumEntry <- methods::setRefClass("PeakForestSpectrumEntry", contains = "BiodbEntry")

PeakForestCompoundEntry <- methods::setRefClass("PeakForestCompoundEntry", contains = "BiodbEntry")


###########
# FACTORY #
###########


###Arg is jcontent ot indicate that the content is already a json.
createPeakforestCompoundFromJSON <- function(contents, drop = FALSE) {
	
	if(is.character(contents))
		contents <- jsonlite::fromJSON(contents, simplifyDataFrame=FALSE)
	
	jsonfields <- list()
	jsonfields[[BIODB.ACCESSION]] <- "id"
	jsonfields[[BIODB.PUBCHEMCOMP.ID]] <- "PubChemCID"
	jsonfields[[BIODB.CHEBI.ID]] <- "ChEBI"
	jsonfields[[BIODB.HMDB.ID]] <- "HMDB"
	jsonfields[[BIODB.KEGG.ID]] <- "KEGG"
	jsonfields[[BIODB.FORMULA]] <- "formula"
	jsonfields[[BIODB.SMILES]] <- "canSmiles"
	jsonfields[[BIODB.AVERAGE.MASS]] <- "averageMass"
	jsonfields[[BIODB.MONOISOTOPIC.MASS]] <- "monoisotopicMass"
	jsonfields[[BIODB.INCHI]] <- "inChI"
	jsonfields[[BIODB.INCHIKEY]] <- "inchiIKey"
	jsonfields[[BIODB.NAME]] <- "mainName"

	entries <- vector(length(contents),mode="list")
	
	for (i in seq_along(contents)){
		
		jsontree <- contents[[i]]
		entry <- PeakForestCompoundEntry$new()
		
		
		for(field in names(jsonfields)){
			
			tosearch <- jsonfields[[field]]
			value <- jsontree$tosearch
			entry$setField(field,value)
		}
		
		entries[[i]] <- entry
	}
	
	
	if (drop && length(contents) == 1)
		entries <- entries[[1]]
	
	entries
}

createPeakforestSpectraFromJSON <- function(contents, drop = FALSE, checkSub = TRUE) {
	
	entries <- vector(length(contents),mode="list")
	jsonfields <- character()
	jsonfields[[BIODB.ACCESSION]] <- "id" # TODO Use BIODB.ACCESSION instead
	jsonfields[[BIODB.MSMODE]] <- "polarity"
	
	
	###Checking that it's a list.
	if(length(contents) == 1){
		if(startsWith(contents[[1]], "<html>") ){
			return(NULL)
		}else{
			contents <- jsonlite::fromJSON(contents[[1]],simplifyDataFrame=FALSE)	
			
		}
	}
	
	for (i in seq_along(contents)){
		
		content <- contents[[i]]
		jsontree <- NULL
		if(typeof(content) == "character"){
			if(startsWith(content, "<html>")|content=="null"){
				entries[[i]] <- NULL
				next
			}
			jsontree <- jsonlite::fromJSON(content,simplifyDataFrame=FALSE)
		}else{
			jsontree <- content
		}
		cnames <- c(BIODB.PEAK.MZ, BIODB.PEAK.RELATIVE.INTENSITY, BIODB.PEAK.FORMULA, BIODB.PEAK.MZTHEO, BIODB.PEAK.ERROR.PPM)
		
		entry <- PeakForestSpectrumEntry$new()
		#####Setting thz mass analyzer
		entry$setField(BIODB.MSDEV,jsontree$analyzerMassSpectrometerDevice$instrumentName)
		entry$setField(BIODB.MSDEVTYPE,jsontree$analyzerMassSpectrometerDevice$ionAnalyzerType)	
		
		
		
		for(field in names(jsonfields)){
			
			tosearch <- jsonfields[[field]]
			value <- jsontree$tosearch
			entry$setField(field,value)
		}
		
		######################
		# TREATING THE PEAKS #
		######################
		
		entry$setField(BIODB.NB.PEAKS,length(jsontree$peaks))
		peaks <- data.frame( matrix( 0,ncol = length(cnames), nrow = 0))
		colnames(peaks) <- cnames
		###Parsing peaks.
		if(length(jsontree$peaks) != 0){
			peaks <- sapply(jsontree$peaks,function(x){
				return(list(as.double(x$mz),
							as.integer(x$ri),
							as.character(x$composition),
							as.double(x$theoricalMass),
							as.double(x$deltaPPM)
				))
			})
			###Removing all whitespaces from the formule.
			peaks[3,]<-vapply(peaks[3,],function(x){
				gsub(" ","",trimws(x))
			},FUN.VALUE = NA_character_)
			
			peaks<-t(peaks)
			colnames(peaks)<-cnames
		}
		
		entry$setField(BIODB.PEAKS,peaks)
		
		##################################
		# TREATING THE LIST OF COMPOUNDS #
		##################################
		
		entry$setField(BIODB.NB.COMPOUNDS,length(jsontree$listOfCompounds))
		compounds <- list()
		
		###Parsing compounds.
		if( length( jsontree$listOfCompounds) != 0){
			compounds <- lapply( jsontree$listOfCompounds, function(x){
				createPeakforestCompoundFromJSON(x)
			})
		}
		
		entry$setField(BIODB.COMPOUNDS, compounds)
		
		
		entries[[i]] <- entry
	}
	
	
	if (drop && length(contents) == 1)
		entries <- entries[[1]]
	
	entries
}


####TDO CLEAN THIS

createReducedSpectraFromJSON <- function(contents,
			 drop = FALSE,
			 checkSub = TRUE) {
	entries <- vector(length(contents), mode = "list")
	jsonfields <- character()
	# jsonfields[[BIODB.ACCESSION]] <-
	# 	"id" # TODO Use BIODB.ACCESSION instead
	
	
	###Checking that it's a list.
	if (length(contents) == 1) {
		if (startsWith(contents[[1]], "<html>")) {
			return(NULL)
		} else{
			contents <- jsonlite::fromJSON(contents[[1]], simplifyDataFrame=FALSE)
			
		}
	}
	
	for (i in seq_along(contents)) {
		content <- contents[[i]]
		jsontree <- NULL
		if (typeof(content) == "character") {
			if (startsWith(content, "<html>") | content == "null") {
				entries[[i]] <- NULL
				next
			}
			jsontree <- jsonlite::fromJSON(content, simplifyDataFrame=FALSE)
		} else{
			jsontree <- content
		}
		
		
		cnames <-
			c(
				BIODB.PEAK.MZ,
				BIODB.PEAK.RELATIVE.INTENSITY,
				BIODB.PEAK.FORMULA,
				BIODB.PEAK.MZTHEO,
				BIODB.PEAK.ERROR.PPM
			)
		
		entry <- PeakForestSpectrumEntry$new()
		entry$setField(BIODB.ACCESSION, jsontree$id)
		
		######################
		# TREATING THE PEAKS #
		######################
		
		entry$setField(BIODB.NB.PEAKS, length(jsontree$peaks))
		peaks <- data.frame(matrix(0, ncol = length(cnames), nrow = 0))
		colnames(peaks) <- cnames
		###Parsing peaks.
		if (length(jsontree$peaks) != 0) {
			peaks <- sapply(jsontree$peaks, function(x) {
				return(
					list(
						as.double(x$mz),
						as.integer(x$ri),
						as.character(x$composition),
						as.double(x$theoricalMass),
						as.double(x$deltaPPM)
					)
				)
			})
			###Removing all whitespaces from the formule.
			peaks[3, ] <- vapply(peaks[3, ], function(x) {
				gsub(" ", "", trimws(x))
			}, FUN.VALUE = NA_character_)
			
			peaks <- as.data.frame(t(peaks))
			colnames(peaks) <- cnames
		}
		
		entry$setField(BIODB.PEAKS, peaks)
		
		entries[[i]] <- entry
	}
	
	
	if (drop && length(contents) == 1)
		entries <- entries[[1]]
	
	entries
}