diff annotationRmn2DWrapper.R @ 3:546c7ccd2ed4 draft default tip

"planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 911f4beba3dcb25c1033e8239426f8f763683523"
author workflow4metabolomics
date Fri, 04 Feb 2022 09:01:11 +0000
parents dff7bde22102
children
line wrap: on
line diff
--- a/annotationRmn2DWrapper.R	Tue Feb 04 10:59:26 2020 -0500
+++ b/annotationRmn2DWrapper.R	Fri Feb 04 09:01:11 2022 +0000
@@ -3,12 +3,11 @@
 ## 201919016 2DNmrAnnotation_1.0.0.R
 ## Marie Tremblay-Franco
 ## MetaboHUB: The French Infrastructure for Metabolomics and Fluxomics
-## www.metabohub.fr/en
-## marie.tremblay-franco@toulouse.inra.fr
+## marie.tremblay-franco@inrae.fr
 
 runExampleL <- FALSE
 
-if(runExampleL) {
+if (runExampleL) {
 ##------------------------------
 ## Example of arguments
 ##------------------------------
@@ -20,6 +19,7 @@
 ##------------------------------
 strAsFacL <- options()$stringsAsFactors
 options(stringsAsFactors = FALSE)
+options(digits = 8, scipen = 3)
 
 ##------------------------------
 ## Constants
@@ -41,9 +41,12 @@
 library(openxlsx)
 library(stringr)
 library(tidyr)
+library(curl)
+library(jsonlite)
+library(stringi)
 
-if(!runExampleL)
-    argLs <- parseCommandArgs(evaluate=FALSE)
+if (!runExampleL)
+    argLs <- parseCommandArgs(evaluate = FALSE)
 logFile <- argLs[["logOut"]]
 sink(logFile)
 
@@ -53,11 +56,10 @@
 ##------------------------------
 ## Functions
 ##------------------------------
-source_local <- function(fname)
-{
-	argv <- commandArgs(trailingOnly = FALSE)
-	base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
-	source(paste(base_dir, fname, sep="/"))
+source_local <- function(fname) {
+    argv <- commandArgs(trailingOnly = FALSE)
+    base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+    source(paste(base_dir, fname, sep = "/"))
 }
 #Import the different functions
 source_local("annotationRmn2D.R")
@@ -66,78 +68,159 @@
 
 ## Input parameter values
 fileToAnnotate <- argLs[[1]]
+  # Constraints values
+ph <- argLs$pH
+field <- argLs$magneticField
+
   # Chosen sequence(s)
 cosy <- 0
 hmbc <- 0
 hsqc <- 0
 jres <- 0
 tocsy <- 0
-## sequences <- str_split(argLs[[2]], ",")[[1]]
-## for (s in 1:length(sequences))
-## {
-##   argv <- commandArgs(trailingOnly = FALSE)
-##   currentDir <- dirname(substring(argv[grep("--file=", argv)], 8))
-##   if (sequences[s]=="cosy"){
-##         cosy <- 1
-##         load(paste(currentDir, "BdDReference_COSY.RData", sep="/"))
-##   }else if(sequences[s]=="hmbc"){
-##         hmbc <- 1
-##         load(paste(currentDir, "BdDReference_HMBC.RData", sep="/"))
-##   }else if(sequences[s]=="hsqc"){
-##         hsqc <- 1
-##         load(paste(currentDir, "BdDReference_HSQC.RData", sep="/"))
-##   }else if(sequences[s]=="jres"){
-##         jres <- 1
-##         load(paste(currentDir, "BdDReference_JRES.RData", sep="/"))
-##   }else if(sequences[s]=="tocsy"){
-##         tocsy <- 1
-##         load(paste(currentDir, "BdDReference_TOCSY.RData", sep="/"))
-##   }else
-##     stop("No chosen sequence", call.=FALSE)
-## }
+
+if (argLs$cosy_2dsequences == "yes") {
+  cosy <- 1
+  peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=cosy&token=9131jq9l8gsjn1j14t351h716u&max=500"))
+  peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE)
+  if (ph != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ]
+  if (field != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ]
+
+  if (nrow(peakforestSpectra) != 0) {
+    BdDReference_COSY <- peakforestSpectra$peaks
+    names(BdDReference_COSY) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1]
+    names(BdDReference_COSY) <- enc2utf8(names(BdDReference_COSY))
+    names(BdDReference_COSY) <- str_replace_all(names(BdDReference_COSY), "\u00e9", "e")
+
+    for (k in seq_len(length(BdDReference_COSY))) {
+      peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_COSY[[k]][, 2], ppm.dim2 = BdDReference_COSY[[k]][, 1],
+                                         BdDReference_COSY[[k]][, 3:ncol(BdDReference_COSY[[k]])])
+      BdDReference_COSY[[k]] <- peakforestSpectra_df
+    }
+  } else {
+    stop("No COSY spectra correspond to requested pH and/or magnetic field", call. = FALSE)
+  }
+  rm(peakforestSpectra)
+  rm(peakforestSpectra_df)
+}
 
-if (argLs[[2]]=='yes')
-{
-  argv <- commandArgs(trailingOnly = FALSE)
-  currentDir <- dirname(substring(argv[grep("--file=", argv)], 8))
-  cosy <- 1
-  load(paste(currentDir, "BdDReference_COSY.RData", sep="/"))
+if (argLs$hmbc_2dsequences == "yes") {
+  hmbc <- 1
+  peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=hmbc&token=9131jq9l8gsjn1j14t351h716u&max=500"))
+  peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE)
+  if (ph != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ]
+  if (field != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ]
+
+  if (nrow(peakforestSpectra) != 0) {
+
+    BdDReference_HMBC <- peakforestSpectra$peaks
+    names(BdDReference_HMBC) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1]
+    names(BdDReference_HMBC) <- enc2utf8(names(BdDReference_HMBC))
+    names(BdDReference_HMBC) <- str_replace_all(names(BdDReference_HMBC), "\u00e9", "e")
+
+    peakforestSpectra_df <- data.frame()
+    for (k in seq_len(length(BdDReference_HMBC))) {
+      peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_HMBC[[k]][, 2], ppm.dim2 = BdDReference_HMBC[[k]][, 1],
+                                         BdDReference_HMBC[[k]][, 3:ncol(BdDReference_HMBC[[k]])])
+      BdDReference_HMBC[[k]] <- peakforestSpectra_df
+    }
+  } else {
+    stop("No HMBC spectra correspond to requested pH and/or magnetic field", call. = FALSE)
+  }
+  rm(peakforestSpectra)
+  rm(peakforestSpectra_df)
 }
 
-if (argLs[[3]]=='yes')
-{
-  argv <- commandArgs(trailingOnly = FALSE)
-  currentDir <- dirname(substring(argv[grep("--file=", argv)], 8))
-  jres <- 1
-  load(paste(currentDir, "BdDReference_JRES.RData", sep="/"))
-}
+if (argLs$hsqc_2dsequences == "yes") {
+  hsqc <- 1
+  peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=hsqc&token=9131jq9l8gsjn1j14t351h716u&max=500"))
+  peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE)
+
+  if (ph != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ]
+  if (field != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ]
 
-if (argLs[[4]]=='yes')
-{
-  argv <- commandArgs(trailingOnly = FALSE)
-  currentDir <- dirname(substring(argv[grep("--file=", argv)], 8))
-  hmbc <- 1
-  load(paste(currentDir, "BdDReference_HMBC.RData", sep="/"))
+  if (nrow(peakforestSpectra) != 0) {
+    BdDReference_HSQC <- peakforestSpectra$peaks
+    names(BdDReference_HSQC) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1]
+    names(BdDReference_HSQC) <- enc2utf8(names(BdDReference_HSQC))
+    names(BdDReference_HSQC) <- str_replace_all(names(BdDReference_HSQC), "\u00e9", "e")
+
+    for (k in seq_len(length(BdDReference_HSQC))) {
+      peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_HSQC[[k]][, 2], ppm.dim2 = BdDReference_HSQC[[k]][, 1],
+                                         BdDReference_HSQC[[k]][, 3:ncol(BdDReference_HSQC[[k]])])
+      BdDReference_HSQC[[k]] <- peakforestSpectra_df
+    }
+  } else {
+    stop("No HSQC spectra correspond to requested pH and/or magnetic field", call. = FALSE)
+  }
+  rm(peakforestSpectra)
+  rm(peakforestSpectra_df)
 }
 
-if (argLs[[5]]=='yes')
-{
-  argv <- commandArgs(trailingOnly = FALSE)
-  currentDir <- dirname(substring(argv[grep("--file=", argv)], 8))
-  hsqc <- 1
-  load(paste(currentDir, "BdDReference_HSQC.RData", sep="/"))
+if (argLs$jres_2dsequences == "yes") {
+  jres <- 1
+  peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=jres&token=9131jq9l8gsjn1j14t351h716u&max=500"))
+  peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE)
+
+  if (ph != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ]
+  if (field != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ]
+
+  if (nrow(peakforestSpectra) != 0) {
+    BdDReference_JRES <- peakforestSpectra$peaks
+    names(BdDReference_JRES) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1]
+    names(BdDReference_JRES) <- enc2utf8(names(BdDReference_JRES))
+    names(BdDReference_JRES) <- str_replace_all(names(BdDReference_JRES), "\u00e9", "e")
+
+    for (k in seq_len(length(BdDReference_JRES))) {
+      peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_JRES[[k]][, 2], ppm.dim2 = BdDReference_JRES[[k]][, 1],
+                                         BdDReference_JRES[[k]][, 3:ncol(BdDReference_JRES[[k]])])
+      BdDReference_JRES[[k]] <- peakforestSpectra_df
+    }
+  } else {
+    stop("No JRES spectra correspond to requested pH and/or magnetic field", call. = FALSE)
+  }
+  rm(peakforestSpectra)
+  rm(peakforestSpectra_df)
 }
 
-if (argLs[[6]]=='yes')
-{
-  argv <- commandArgs(trailingOnly = FALSE)
-  currentDir <- dirname(substring(argv[grep("--file=", argv)], 8))
+if (argLs$tocsy_2dsequences == "yes") {
   tocsy <- 1
-  load(paste(currentDir, "BdDReference_TOCSY.RData", sep="/"))
+  peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=tocsy&token=9131jq9l8gsjn1j14t351h716u&max=500"))
+  peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE)
+
+  if (ph != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ]
+  if (field != 0)
+    peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ]
+
+  if (nrow(peakforestSpectra) != 0) {
+    BdDReference_TOCSY <- peakforestSpectra$peaks
+    names(BdDReference_TOCSY) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1]
+    names(BdDReference_TOCSY) <- enc2utf8(names(BdDReference_TOCSY))
+    names(BdDReference_TOCSY) <- str_replace_all(names(BdDReference_TOCSY), "\u00e9", "e")
+
+    for (k in seq_len(length(BdDReference_TOCSY))) {
+      peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_TOCSY[[k]][, 2], ppm.dim2 = BdDReference_TOCSY[[k]][, 1],
+                                         BdDReference_TOCSY[[k]][, 3:ncol(BdDReference_TOCSY[[k]])])
+      BdDReference_TOCSY[[k]] <- peakforestSpectra_df
+    }
+  } else {
+    stop("No TOCSY spectra correspond to requested pH and/or magnetic field", call. = FALSE)
+  }
+  rm(peakforestSpectra)
+  rm(peakforestSpectra_df)
 }
 
-if (argLs[[2]]=='no' & argLs[[3]]=='no' & argLs[[4]]=='no' & argLs[[5]]=='no' & argLs[[6]]=='no')
-  stop("No chosen sequence", call.=FALSE)
+if (argLs$cosy_2dsequences == "no" & argLs$hmbc_2dsequences == "no" & argLs$hsqc_2dsequences == "no" & argLs$jres_2dsequences == "no" & argLs$tocsy_2dsequences == "no")
+  stop("No chosen sequence. You have to choose at least 1 sequence", call. = FALSE)
 
 
   # User database
@@ -158,66 +241,60 @@
 print(argLs)
 
 ## ANNOTATION
-st0=Sys.time()
-pdf(AnnotationGraph,onefile=TRUE)
-annotationMelange <- annotationRmn2DGlobale(fileToAnnotate, tolPpm1=tolPpm1, tolPpm2HJRes=tolPpm2HJRes, 
-                                             tolPpm2C=tolPpm2C, cosy=cosy, hmbc=hmbc, hsqc=hsqc, 
-                                             jres=jres, tocsy=tocsy, seuil=seuil, unicite=unicite)
+st0 <- Sys.time()
+pdf(AnnotationGraph, onefile = TRUE)
+annotationMelange <- annotationRmn2DGlobale(fileToAnnotate, tolPpm1 = tolPpm1, tolPpm2HJRes = tolPpm2HJRes,
+                                             tolPpm2C = tolPpm2C, cosy = cosy, hmbc = hmbc, hsqc = hsqc,
+                                             jres = jres, tocsy = tocsy, seuil = seuil, unicite = unicite)
 dev.off()
 
-if (cosy==1)
-{
-  write.table(annotationMelange$COSY$liste_resultat, file=argLs[["annotationCOSY"]], quote=FALSE, 
-              row.names=FALSE,sep="\t")
+if (cosy == 1) {
+  write.table(annotationMelange$COSY$liste_resultat, file = argLs[["annotationCOSY"]], quote = FALSE,
+              row.names = FALSE, sep = "\t")
   if (nrow(annotationMelange$COSY$listing_ppm_commun) != 0)
-      write.table(annotationMelange$COSY$listing_ppm_commun, file=argLs[["ppmCommunCOSY"]], quote=FALSE, 
-                  row.names=FALSE,sep="\t")
+      write.table(annotationMelange$COSY$listing_ppm_commun, file = argLs[["ppmCommunCOSY"]], quote = FALSE,
+                  row.names = FALSE, sep = "\t")
 }
 
-if (hmbc==1)
-{
-  write.table(annotationMelange$HMBC$liste_resultat, file=argLs[["annotationHMBC"]], quote=FALSE, 
-              row.names=FALSE,sep="\t")
+if (hmbc == 1) {
+  write.table(annotationMelange$HMBC$liste_resultat, file = argLs[["annotationHMBC"]], quote = FALSE,
+              row.names = FALSE, sep = "\t")
   if (nrow(annotationMelange$HMBC$listing_ppm_commun) != 0)
-    write.table(annotationMelange$HMBC$listing_ppm_commun, file=argLs[["ppmCommunHMBC"]], quote=FALSE, 
-                row.names=FALSE,sep="\t")
+    write.table(annotationMelange$HMBC$listing_ppm_commun, file = argLs[["ppmCommunHMBC"]], quote = FALSE,
+                row.names = FALSE, sep = "\t")
 }
 
-if (hsqc==1)
-{
-  write.table(annotationMelange$HSQC$liste_resultat, file=argLs[["annotationHSQC"]], quote=FALSE, 
-              row.names=FALSE,sep="\t")
+if (hsqc == 1) {
+  write.table(annotationMelange$HSQC$liste_resultat, file = argLs[["annotationHSQC"]], quote = FALSE,
+              row.names = FALSE, sep = "\t")
   if (nrow(annotationMelange$HSQC$listing_ppm_commun) != 0)
-    write.table(annotationMelange$HSQC$listing_ppm_commun, file=argLs[["ppmCommunHSQC"]], quote=FALSE, 
-                row.names=FALSE,sep="\t")
+    write.table(annotationMelange$HSQC$listing_ppm_commun, file = argLs[["ppmCommunHSQC"]], quote = FALSE,
+                row.names = FALSE, sep = "\t")
 }
 
-if (jres==1)
-{
-  write.table(annotationMelange$JRES$liste_resultat, file=argLs[["annotationJRES"]], quote=FALSE, 
-              row.names=FALSE,sep="\t")
+if (jres == 1) {
+  write.table(annotationMelange$JRES$liste_resultat, file = argLs[["annotationJRES"]], quote = FALSE,
+              row.names = FALSE, sep = "\t")
   if (nrow(annotationMelange$JRES$listing_ppm_commun) != 0)
-    write.table(annotationMelange$JRES$listing_ppm_commun, file=argLs[["ppmCommunJRES"]], quote=FALSE, 
-                row.names=FALSE,sep="\t")
+    write.table(annotationMelange$JRES$listing_ppm_commun, file = argLs[["ppmCommunJRES"]], quote = FALSE,
+                row.names = FALSE, sep = "\t")
 }
 
-if (tocsy==1)
-{
-  write.table(annotationMelange$TOCSY$liste_resultat, file=argLs[["annotationTOCSY"]], quote=FALSE, 
-              row.names=FALSE,sep="\t")
+if (tocsy == 1) {
+  write.table(annotationMelange$TOCSY$liste_resultat, file = argLs[["annotationTOCSY"]], quote = FALSE,
+              row.names = FALSE, sep = "\t")
   if (nrow(annotationMelange$TOCSY$listing_ppm_commun) != 0)
-    write.table(annotationMelange$TOCSY$listing_ppm_commun, file=argLs[["ppmCommunTOCSY"]], quote=FALSE, 
-                row.names=FALSE,sep="\t")
+    write.table(annotationMelange$TOCSY$listing_ppm_commun, file = argLs[["ppmCommunTOCSY"]], quote = FALSE,
+                row.names = FALSE, sep = "\t")
 }
 
 ## Combinaison de sequences
-if (cosy + jres + hmbc + hsqc + tocsy > 1)
-{
-  write.table(annotationMelange$combination, file=argLs[["annotationCombination"]], quote=FALSE, 
-              row.names=FALSE,sep="\t")
+if (cosy + jres + hmbc + hsqc + tocsy > 1) {
+  write.table(annotationMelange$combination, file = argLs[["annotationCombination"]], quote = FALSE,
+              row.names = FALSE, sep = "\t")
 }
-st1=Sys.time()
-print(st1-st0)
+st1 <- Sys.time()
+print(st1 - st0)
 
 ## Ending
 ##--------