diff chem.R @ 0:e66bb061af06 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author prog
date Tue, 12 Jul 2016 12:02:37 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chem.R	Tue Jul 12 12:02:37 2016 -0400
@@ -0,0 +1,119 @@
+if ( ! exists('load.sdf')) { # Do not load again if already loaded
+
+	#############
+	# CONSTANTS #
+	#############
+
+	R.LIB.CHEM.FILE.PATH <- parent.frame(2)$ofile
+
+	CARBOXYL.GROUP <- "carboxyl"
+	
+	##################
+	# LOAD JAVA CHEM #
+	##################
+
+	load.java.chem <- function() {
+		library(rJava)
+		.jinit()
+		.jcall('java/lang/System', 'S', 'setProperty', "rJava.debug", "1") # DEBUG/VERBOSE mode --> TODO does not work
+		cmd <- c("mvn", "-f", file.path(dirname(R.LIB.CHEM.FILE.PATH), '..', 'java-chem'), "org.apache.maven.plugins:maven-dependency-plugin:2.10:build-classpath")
+		classpath <- system(paste(cmd, collapse = " "), intern = TRUE)
+		classpath <- grep("^\\[INFO]", classpath, invert = TRUE, value = TRUE)
+		classpath <- strsplit(classpath, split = ':')[[1]] # TODO make it portable (classpath under Windows use ';' instead of ':')
+		.jaddClassPath(classpath)
+		.jaddClassPath(file.path(dirname(R.LIB.CHEM.FILE.PATH), '..', 'java-chem', 'target', 'java-chem-1.0.jar'))
+	}
+
+	#############
+	# GET INCHI #
+	#############
+
+	get.inchi <- function(mol) {
+		load.java.chem()
+		cdkhlp <- .jnew('org/openscience/chem/CdkHelper')
+		inchi <- .jcall(cdkhlp, 'S', 'getInchi', mol)
+		return(inchi)
+	}
+
+	#########################
+	# CONTAINS SUBSTRUCTURE #
+	#########################
+
+	contains.substructure <- function(inchi, group) {
+
+		load.java.chem()
+		cdkhlp <- .jnew('org/openscience/chem/CdkHelper')
+
+		# Search for substructure
+		contains <- .jcall(cdkhlp, '[Z', 'containFunctionalGroup', inchi, toupper(group))
+
+		return(contains)
+	}
+
+	############
+	# LOAD SDF #
+	############
+
+	load.sdf <- function(file, silent = FALSE) {
+
+		library(stringr)
+
+		# Valid file ?
+		if ( ! file.exists(file)) {
+			if ( ! silent)
+				warning(paste0("SDF File \"", file, "\" does not exist."))
+			return(NULL)
+		}
+
+		info <- data.frame()
+
+		# Read file line by line
+		con <- file(file)
+		open(con)
+		imol <- 1 # Index of molecule inside the file
+		field.name <- NA_character_
+		while (TRUE) {
+
+			# Read one line
+			line <- readLines(con, n = 1)
+			if (length(line) == 0)
+				break
+
+			# Field value
+			if ( ! is.na(field.name)) {
+				info[imol, field.name] <- line
+				field.name <- NA_character_
+				next
+			}
+
+			# Empty line
+			if (line == "") {
+				field.name <- NA_character_
+				next
+			}
+
+			# End of molecule
+			if (substring(line, 1, 4) == "$$$$") {
+				field.name <- NA_character_
+				imol <- imol + 1
+				next
+			}
+
+			# Metadata field
+			g <- str_match(line, "^> <(.*)>$")
+			if ( ! is.na(g[1,2])) {
+				field.name <- g[1,2]
+				next
+			}
+		}
+		close(con)
+
+		# Load molecule structures
+		load.java.chem()
+		cdkhlp <- .jnew('org/openscience/chem/CdkHelper')
+		struct <- .jcall(cdkhlp, '[Lorg/openscience/cdk/interfaces/IAtomContainer;', 'loadSdf', file)
+
+		return(list(struct = struct, info = info))
+	}
+
+} # end of load safe guard