Mercurial > repos > marie-tremblay-metatoul > 2dnmrannotation
changeset 4:cf11fa0c47c8 draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit b5f7f56b5ffc3c900236c077f72b321df20647be
author | workflow4metabolomics |
---|---|
date | Thu, 23 Jan 2025 15:28:44 +0000 |
parents | 546c7ccd2ed4 |
children | |
files | annotationRmn2D.R annotationRmn2DGlobale.R annotationRmn2DWrapper.R annotationRmn2D_xml.xml viridis.R |
diffstat | 5 files changed, 799 insertions(+), 707 deletions(-) [+] |
line wrap: on
line diff
--- a/annotationRmn2D.R Fri Feb 04 09:01:11 2022 +0000 +++ b/annotationRmn2D.R Thu Jan 23 15:28:44 2025 +0000 @@ -1,223 +1,229 @@ -########################################################################## -# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN # -# matriceComplexe : data.frame liste couples ppm de la matrice a annoter # -# BdDStandards : objet contenant la base de donnees des composes standards # -# nom_sequence : nom sequence 2D a utiliser pour annotation ("JRES", "COSY", "TOCSY", "HMBC", "HSQC") # -# ppm1Tol : tolerance ppm axe abscisses # -# ppm2Tol : tolerance ppm axe ordonnees # -# nb_ligne_template : preciser le nombre total de ligne de la feuille de calcul a annoter # -####################################################################################################### -annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol = 0.01, ppm2Tol = 0.01, - seuil = 0, unicite = "NO") { - ## Longueur de la peak-list de la matrice a annoter - PeakListLength <- length(matriceComplexe[, 1]) - - ## Nombre de metabolites inclus dans BdD de composes standards - nbMetabolitesBdD <- length(BdDStandards) - matrixAnnotation <- data.frame() - allMetabolitesList <- data.frame() - seuil_score <- seuil - - ## Boucle sur les metabolites inclus dans BdD - for (i in seq_len(nbMetabolitesBdD)) { - ## Infos metabolite en cours - iMetabolite <- BdDStandards[[i]] - ppm1M <- iMetabolite[, 1] - ppm2M <- iMetabolite[, 2] - nbPeakMetabolite <- length(ppm1M) - MetaboliteName <- names(BdDStandards[i]) - - ## Initialisation - k <- 0 - presenceScore <- 0 - annotatedPpmRef <- data.frame() - annotatedPpmList <- data.frame() - annotatedPeakLength <- 0 - metabolites <- data.frame() - metabolitesList <- data.frame() - - ## Boucle sur les couples de pics de la matrice a annoter - for (p in seq_len(PeakListLength)) { - ppmAnnotationF1 <- as.numeric(matriceComplexe[p, 3]) - ppmAnnotationF2 <- as.numeric(matriceComplexe[p, 2]) - e <- simpleMessage("end of file") - tryCatch({ - if (!is.na(ppmAnnotationF1)) { - matrixAnnotation <- unique.data.frame(rbind.data.frame(matrixAnnotation, matriceComplexe[p, ])) - } - # Recherche du couple de pics de la matrice la liste des couples du metabolite standard - metaboliteIn <- (ppm1M >= (ppmAnnotationF2 - ppm1Tol) & ppm1M <= (ppmAnnotationF2 + ppm1Tol) & - ppm2M >= (ppmAnnotationF1 - ppm2Tol) & ppm2M <= (ppmAnnotationF1 + ppm2Tol)) - WhichMetaboliteIn <- which(metaboliteIn) - # Si au moins un couple de la matrice a annoter dans liste couples metabolite standard - if (length(WhichMetaboliteIn) > 0) { - for (a in seq_len(length(WhichMetaboliteIn))) { - annotatedPpmList <- data.frame(ppm1 = ppm1M[WhichMetaboliteIn[a]], ppm2 = ppm2M[WhichMetaboliteIn[a]], theoricalLength = nbPeakMetabolite) - annotatedPpmRef <- rbind(annotatedPpmRef, annotatedPpmList) - } - } - }, error = function(e) { - cat("End of file \n"); - }) - } - - # Au - 1 couple de ppm de la matrice complexe annote - if (nrow(annotatedPpmRef) >= 1) { - ## Nombre couples annotes - annotatedPeakLength <- nrow(annotatedPpmRef) - - ## Recherche doublons - annotatedDoublons <- duplicated(annotatedPpmRef) - if (sum(duplicated(annotatedPpmRef)) > 0) { - annotatedPeakLength <- nrow(annotatedPpmRef) - sum(duplicated(annotatedPpmRef)) - annotatedPpmRef <- annotatedPpmRef[-duplicated(annotatedPpmRef), ] - } - presenceScore <- round(annotatedPeakLength / nbPeakMetabolite, 2) - } - - ## Conservation metabolites dont score > seuil - if (presenceScore > seuil_score) { - metabolites <- data.frame(Metabolite = MetaboliteName, score = presenceScore) - metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites) - allMetabolitesList <- rbind.data.frame(allMetabolitesList, metabolitesList) - } - } - - # Initialisation - commonPpm <- data.frame() - commonPpmList <- data.frame() - metaboliteAdd <- data.frame() - metaboliteAddList <- data.frame() - commonMetabolitesList <- data.frame() - commonMetabolitesPpmList <- data.frame() - commonMetabolitesPpmAllList1 <- data.frame() - commonMetabolitesPpmAllList <- data.frame() - listeTotale_2D_unicite <- allMetabolitesList[, 1:4] - allMetabolitesList <- allMetabolitesList[, -3] - metabolitesAllUnicite <- data.frame() - - ## Boucle sur tous couples annotes - for (j in seq_len(length(allMetabolitesList$ppm1))) { - ## Boucle sur metabolites dans BdD composes standards - for (i in seq_len(nbMetabolitesBdD)) { - ppmMetaboliteBdD <- BdDStandards[[i]] - ppm1M <- ppmMetaboliteBdD[, 1] - ppm2M <- ppmMetaboliteBdD[, 2] - # Nombre de couples metabolite - nbPeakMetabolite <- length(ppm1M) - MetaboliteName <- names(BdDStandards[i]) - - metabolitesInAll <- (ppm1M >= (allMetabolitesList[j, 1] - ppm1Tol) & ppm1M <= (allMetabolitesList[j, 1] + ppm1Tol) & - ppm2M >= (allMetabolitesList[j, 2] - ppm2Tol) & ppm2M <= (allMetabolitesList[j, 2] + ppm2Tol)) - WhichMetabolitesInAll <- which(metabolitesInAll) - - if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0) { - metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j, ]) - commonPpm <- data.frame(ppm1 = allMetabolitesList[j, 1], ppm2 = allMetabolitesList[j, 2]) - commonPpmList <- rbind.data.frame(commonPpmList, commonPpm) - commonPpmList <- unique(commonPpmList) - metaboliteAdd <- data.frame(nom_metabolite = MetaboliteName) - metaboliteAddList <- rbind.data.frame(metaboliteAddList, metaboliteAdd) - commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite = allMetabolitesList[j, 3]), metaboliteAddList) - commonMetabolitesPpmList <- cbind.data.frame(commonPpm, commonMetabolitesList) - commonMetabolitesPpmAllList1 <- rbind.data.frame(commonMetabolitesPpmAllList1, commonMetabolitesPpmList) - commonMetabolitesPpmAllList1 <- unique.data.frame(commonMetabolitesPpmAllList1) - } - } - commonMetabolitesPpmAllList <- rbind.data.frame(commonMetabolitesPpmAllList, commonMetabolitesPpmAllList1) - commonMetabolitesPpmAllList <- unique.data.frame(commonMetabolitesPpmAllList) - - #initialisation des data.frame - commonPpm <- data.frame() - metaboliteAdd <- data.frame() - metaboliteAddList <- data.frame() - metabolite_ref <- data.frame() - commonMetabolitesList <- data.frame() - commonMetabolitesPpmList <- data.frame() - commonMetabolitesPpmAllList1 <- data.frame() - } - - unicityAllList <- listeTotale_2D_unicite - if (nrow(listeTotale_2D_unicite) != 0 & nrow(metabolitesAllUnicite) != 0) - unicityAllList <- setdiff(listeTotale_2D_unicite, metabolitesAllUnicite) - - unicitynbCouplesRectif <- data.frame() - for (g in seq_len(nrow(unicityAllList))) { - metaboliteUnicity <- (unicityAllList$Metabolite == unicityAllList$Metabolite[g]) - WhichMetaboliteUnicity <- which(metaboliteUnicity) - nb_occurence <- length(WhichMetaboliteUnicity) - unicitynbCouplesRectif <- rbind.data.frame(unicitynbCouplesRectif, nb_occurence) - } - names(unicitynbCouplesRectif) <- "NbCouplesAnnotes" - unicityAllList <- cbind.data.frame(unicityAllList, unicitynbCouplesRectif) - - unicityAllList <- cbind.data.frame(unicityAllList, score_unicite = unicityAllList$NbCouplesAnnotes / unicityAllList$theoricalLength) - unicityAllList <- unicityAllList[, -3] - unicityAllList <- unicityAllList[, -4] - - unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score, ] - - listeTotale_metabo <- data.frame() - if (nrow(commonPpmList) != 0) { - for (o in seq_len(length(commonPpmList[, 1]))) { - tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o, 1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o, 2]) - w6 <- which(tf6) - - for (s in seq_len(length(w6))) { - metaboliteAdd <- data.frame(nom_metabolite = commonMetabolitesPpmAllList[w6[s], 3]) - commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1, ], sep = " ") - } - liste_metabo_ppm <- cbind.data.frame(ppm1 = commonPpmList[o, 1], ppm2 = commonPpmList[o, 2], commonMetabolitesList) - listeTotale_metabo <- rbind.data.frame(listeTotale_metabo, liste_metabo_ppm) - commonMetabolitesList <- data.frame() - } - } - - # Representation graphique - if (nom_sequence == "HSQC" | nom_sequence == "HMBC") { - atome <- "13C" - indice_positif <- 1 - indice_negatif <- -10 - } else { - atome <- "1H" - indice_positif <- 0.5 - indice_negatif <- -0.5 - } - - matriceComplexe <- matrixAnnotation - ppm1 <- as.numeric(matriceComplexe[, 2]) - ppm2 <- as.numeric(matriceComplexe[, 3]) - - if (unicite == "NO") { - listeTotale_2D_a_utiliser <- allMetabolitesList - d1.ppm <- allMetabolitesList$ppm1 - d2.ppm <- allMetabolitesList$ppm2 - } else { - listeTotale_2D_a_utiliser <- unicityAllList - d1.ppm <- listeTotale_2D_a_utiliser$ppm1 - d2.ppm <- listeTotale_2D_a_utiliser$ppm2 - } - - if (nrow(listeTotale_2D_a_utiliser) > 0) { - ## Taches de correlations - # Matrice biologique + Annotations - maxX <- max(round(max(as.numeric(matriceComplexe[, 2]))) + 0.5, round(max(as.numeric(matriceComplexe[, 2])))) - maxY <- max(round(max(as.numeric(matriceComplexe[, 3]))) + indice_positif, round(max(as.numeric(matriceComplexe[, 3])))) - probability.score <- as.factor(round(listeTotale_2D_a_utiliser[, 4], 2)) - lgr <- length(unique(probability.score)) - sp <- ggplot(matriceComplexe, aes(x = ppm1, y = ppm2)) - sp <- sp + geom_point(size = 2) + scale_x_reverse(breaks = seq(maxX, 0, -0.5)) + - scale_y_reverse(breaks = seq(maxY, 0, indice_negatif)) + - xlab("1H chemical shift (ppm)") + ylab(paste(atome, " chemical shift (ppm)")) + ggtitle(nom_sequence) + - geom_text(data = listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label = str_to_lower(substr(listeTotale_2D_a_utiliser[, 3], 1, 3)), col = probability.score), - size = 4, hjust = 0, nudge_x = 0.02, vjust = 0, nudge_y = 0.2) + scale_colour_manual(values = viridis(lgr)) - print(sp) - } - - # Liste des resultats (couples pmm / metabolite / score) + liste ppms metabolites communs - if (unicite == "NO") { - return(list(liste_resultat = allMetabolitesList, listing_ppm_commun = listeTotale_metabo)) - } else { - return(list(liste_resultat_unicite = unicityAllList, listing_ppm_commun_affichage = listeTotale_metabo)) - } -} +########################################################################## +# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE SEQUENCE RMN # +# matriceComplexe : data.frame liste couples ppm de la matrice a annoter # +# BdDStandards : objet contenant la base de donnees des composes standards # +# nom_sequence : nom sequence 2D a utiliser pour annotation ("JRES", "COSY", "TOCSY", "HMBC", "HSQC") # +# ppm1Tol : tolerance ppm axe abscisses # +# ppm2Tol : tolerance ppm axe ordonnees # +# nb_ligne_template : preciser le nombre total de ligne de la feuille de calcul a annoter # +####################################################################################################### +annotationRmn2D <- function(matriceComplexe, BdDStandards, nom_sequence, ppm1Tol = 0.01, ppm2Tol = 0.01, + seuil = 0, unicite = "NO") { + ## Longueur de la peak-list de la matrice a annoter + PeakListLength <- length(matriceComplexe[, 1]) + + ## Nombre de metabolites inclus dans BdD de composes standards + nbMetabolitesBdD <- length(BdDStandards) + matrixAnnotation <- data.frame() + allMetabolitesList <- data.frame() + seuil_score <- seuil + + ## Boucle sur les metabolites inclus dans BdD + for (i in seq_len(nbMetabolitesBdD)) { + ## Infos metabolite en cours + iMetabolite <- BdDStandards[[i]] + ppm1M <- iMetabolite[, 1] + ppm2M <- iMetabolite[, 2] + nbPeakMetabolite <- length(ppm1M) + MetaboliteName <- names(BdDStandards[i]) + + ## Initialisation + k <- 0 + presenceScore <- 0 + annotatedPpmRef <- data.frame() + annotatedPpmList <- data.frame() + annotatedPeakLength <- 0 + metabolites <- data.frame() + metabolitesList <- data.frame() + + ## Boucle sur les couples de pics de la matrice a annoter + for (p in seq_len(PeakListLength)) { + ppmAnnotationF1 <- as.numeric(matriceComplexe[p, 3]) + ppmAnnotationF2 <- as.numeric(matriceComplexe[p, 2]) + e <- simpleMessage("end of file") + tryCatch( + { + if (!is.na(ppmAnnotationF1)) { + matrixAnnotation <- unique.data.frame(rbind.data.frame(matrixAnnotation, matriceComplexe[p, ])) + } + # Recherche du couple de pics de la matrice la liste des couples du metabolite standard + metaboliteIn <- (ppm1M >= (ppmAnnotationF2 - ppm1Tol) & ppm1M <= (ppmAnnotationF2 + ppm1Tol) & + ppm2M >= (ppmAnnotationF1 - ppm2Tol) & ppm2M <= (ppmAnnotationF1 + ppm2Tol)) + WhichMetaboliteIn <- which(metaboliteIn) + # Si au moins un couple de la matrice a annoter dans liste couples metabolite standard + if (length(WhichMetaboliteIn) > 0) { + for (a in seq_len(length(WhichMetaboliteIn))) { + annotatedPpmList <- data.frame(ppm1 = ppm1M[WhichMetaboliteIn[a]], ppm2 = ppm2M[WhichMetaboliteIn[a]], theoricalLength = nbPeakMetabolite) + annotatedPpmRef <- rbind(annotatedPpmRef, annotatedPpmList) + } + } + }, + error = function(e) { + cat("End of file \n") + } + ) + } + + # Au - 1 couple de ppm de la matrice complexe annote + if (nrow(annotatedPpmRef) >= 1) { + ## Nombre couples annotes + annotatedPeakLength <- nrow(annotatedPpmRef) + + ## Recherche doublons + annotatedDoublons <- duplicated(annotatedPpmRef) + if (sum(duplicated(annotatedPpmRef)) > 0) { + annotatedPeakLength <- nrow(annotatedPpmRef) - sum(duplicated(annotatedPpmRef)) + annotatedPpmRef <- annotatedPpmRef[-duplicated(annotatedPpmRef), ] + } + presenceScore <- round(annotatedPeakLength / nbPeakMetabolite, 2) + } + + ## Conservation metabolites dont score > seuil + if (presenceScore > seuil_score) { + metabolites <- data.frame(Metabolite = MetaboliteName, score = presenceScore) + metabolitesList <- cbind.data.frame(annotatedPpmRef, metabolites) + allMetabolitesList <- rbind.data.frame(allMetabolitesList, metabolitesList) + } + } + + # Initialisation + commonPpm <- data.frame() + commonPpmList <- data.frame() + metaboliteAdd <- data.frame() + metaboliteAddList <- data.frame() + commonMetabolitesList <- data.frame() + commonMetabolitesPpmList <- data.frame() + commonMetabolitesPpmAllList1 <- data.frame() + commonMetabolitesPpmAllList <- data.frame() + listeTotale_2D_unicite <- allMetabolitesList[, 1:4] + allMetabolitesList <- allMetabolitesList[, -3] + metabolitesAllUnicite <- data.frame() + + ## Boucle sur tous couples annotes + for (j in seq_len(length(allMetabolitesList$ppm1))) { + ## Boucle sur metabolites dans BdD composes standards + for (i in seq_len(nbMetabolitesBdD)) { + ppmMetaboliteBdD <- BdDStandards[[i]] + ppm1M <- ppmMetaboliteBdD[, 1] + ppm2M <- ppmMetaboliteBdD[, 2] + # Nombre de couples metabolite + nbPeakMetabolite <- length(ppm1M) + MetaboliteName <- names(BdDStandards[i]) + + metabolitesInAll <- (ppm1M >= (allMetabolitesList[j, 1] - ppm1Tol) & ppm1M <= (allMetabolitesList[j, 1] + ppm1Tol) & + ppm2M >= (allMetabolitesList[j, 2] - ppm2Tol) & ppm2M <= (allMetabolitesList[j, 2] + ppm2Tol)) + WhichMetabolitesInAll <- which(metabolitesInAll) + + if (MetaboliteName != allMetabolitesList[j, 3] & length(WhichMetabolitesInAll) > 0) { + metabolitesAllUnicite <- rbind.data.frame(metabolitesAllUnicite, listeTotale_2D_unicite[j, ]) + commonPpm <- data.frame(ppm1 = allMetabolitesList[j, 1], ppm2 = allMetabolitesList[j, 2]) + commonPpmList <- rbind.data.frame(commonPpmList, commonPpm) + commonPpmList <- unique(commonPpmList) + metaboliteAdd <- data.frame(nom_metabolite = MetaboliteName) + metaboliteAddList <- rbind.data.frame(metaboliteAddList, metaboliteAdd) + commonMetabolitesList <- rbind.data.frame(data.frame(nom_metabolite = allMetabolitesList[j, 3]), metaboliteAddList) + commonMetabolitesPpmList <- cbind.data.frame(commonPpm, commonMetabolitesList) + commonMetabolitesPpmAllList1 <- rbind.data.frame(commonMetabolitesPpmAllList1, commonMetabolitesPpmList) + commonMetabolitesPpmAllList1 <- unique.data.frame(commonMetabolitesPpmAllList1) + } + } + commonMetabolitesPpmAllList <- rbind.data.frame(commonMetabolitesPpmAllList, commonMetabolitesPpmAllList1) + commonMetabolitesPpmAllList <- unique.data.frame(commonMetabolitesPpmAllList) + + # initialisation des data.frame + commonPpm <- data.frame() + metaboliteAdd <- data.frame() + metaboliteAddList <- data.frame() + metabolite_ref <- data.frame() + commonMetabolitesList <- data.frame() + commonMetabolitesPpmList <- data.frame() + commonMetabolitesPpmAllList1 <- data.frame() + } + + unicityAllList <- listeTotale_2D_unicite + if (nrow(listeTotale_2D_unicite) != 0 & nrow(metabolitesAllUnicite) != 0) { + unicityAllList <- setdiff(listeTotale_2D_unicite, metabolitesAllUnicite) + } + + unicitynbCouplesRectif <- data.frame() + for (g in seq_len(nrow(unicityAllList))) { + metaboliteUnicity <- (unicityAllList$Metabolite == unicityAllList$Metabolite[g]) + WhichMetaboliteUnicity <- which(metaboliteUnicity) + nb_occurence <- length(WhichMetaboliteUnicity) + unicitynbCouplesRectif <- rbind.data.frame(unicitynbCouplesRectif, nb_occurence) + } + names(unicitynbCouplesRectif) <- "NbCouplesAnnotes" + unicityAllList <- cbind.data.frame(unicityAllList, unicitynbCouplesRectif) + + unicityAllList <- cbind.data.frame(unicityAllList, score_unicite = unicityAllList$NbCouplesAnnotes / unicityAllList$theoricalLength) + unicityAllList <- unicityAllList[, -3] + unicityAllList <- unicityAllList[, -4] + + unicityAllList <- unicityAllList[unicityAllList$score_unicite > seuil_score, ] + + listeTotale_metabo <- data.frame() + if (nrow(commonPpmList) != 0) { + for (o in seq_len(length(commonPpmList[, 1]))) { + tf6 <- (commonMetabolitesPpmAllList$ppm1 == commonPpmList[o, 1] & commonMetabolitesPpmAllList$ppm2 == commonPpmList[o, 2]) + w6 <- which(tf6) + + for (s in seq_len(length(w6))) { + metaboliteAdd <- data.frame(nom_metabolite = commonMetabolitesPpmAllList[w6[s], 3]) + commonMetabolitesList <- paste(commonMetabolitesList, metaboliteAdd[1, ], sep = " ") + } + liste_metabo_ppm <- cbind.data.frame(ppm1 = commonPpmList[o, 1], ppm2 = commonPpmList[o, 2], commonMetabolitesList) + listeTotale_metabo <- rbind.data.frame(listeTotale_metabo, liste_metabo_ppm) + commonMetabolitesList <- data.frame() + } + } + + # Representation graphique + if (nom_sequence == "HSQC" | nom_sequence == "HMBC") { + atome <- "13C" + indice_positif <- 1 + indice_negatif <- -10 + } else { + atome <- "1H" + indice_positif <- 0.5 + indice_negatif <- -0.5 + } + + matriceComplexe <- matrixAnnotation + ppm1 <- as.numeric(matriceComplexe[, 2]) + ppm2 <- as.numeric(matriceComplexe[, 3]) + + if (unicite == "NO") { + listeTotale_2D_a_utiliser <- allMetabolitesList + d1.ppm <- allMetabolitesList$ppm1 + d2.ppm <- allMetabolitesList$ppm2 + } else { + listeTotale_2D_a_utiliser <- unicityAllList + d1.ppm <- listeTotale_2D_a_utiliser$ppm1 + d2.ppm <- listeTotale_2D_a_utiliser$ppm2 + } + + if (nrow(listeTotale_2D_a_utiliser) > 0) { + ## Taches de correlations + # Matrice biologique + Annotations + maxX <- max(round(max(as.numeric(matriceComplexe[, 2]))) + 0.5, round(max(as.numeric(matriceComplexe[, 2])))) + maxY <- max(round(max(as.numeric(matriceComplexe[, 3]))) + indice_positif, round(max(as.numeric(matriceComplexe[, 3])))) + probability.score <- as.factor(round(listeTotale_2D_a_utiliser[, 4], 2)) + lgr <- length(unique(probability.score)) + sp <- ggplot(matriceComplexe, aes(x = ppm1, y = ppm2)) + sp <- sp + geom_point(size = 2) + scale_x_reverse(breaks = seq(maxX, 0, -0.5)) + + scale_y_reverse(breaks = seq(maxY, 0, indice_negatif)) + + xlab("1H chemical shift (ppm)") + ylab(paste(atome, " chemical shift (ppm)")) + ggtitle(nom_sequence) + + geom_text( + data = listeTotale_2D_a_utiliser, aes(d1.ppm, d2.ppm, label = str_to_lower(substr(listeTotale_2D_a_utiliser[, 3], 1, 3)), col = probability.score), + size = 4, hjust = 0, nudge_x = 0.02, vjust = 0, nudge_y = 0.2 + ) + scale_colour_manual(values = viridis(lgr)) + print(sp) + } + + # Liste des resultats (couples pmm / metabolite / score) + liste ppms metabolites communs + if (unicite == "NO") { + return(list(liste_resultat = allMetabolitesList, listing_ppm_commun = listeTotale_metabo)) + } else { + return(list(liste_resultat_unicite = unicityAllList, listing_ppm_commun_affichage = listeTotale_metabo)) + } +}
--- a/annotationRmn2DGlobale.R Fri Feb 04 09:01:11 2022 +0000 +++ b/annotationRmn2DGlobale.R Thu Jan 23 15:28:44 2025 +0000 @@ -1,147 +1,158 @@ -################################################################################################### -# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE (OU PLUSIEURS) SEQUENCE(s) # -# template : dataframe contenant la liste des couples de deplacements chimiques de la matrice complexe a annoter # -# cosy : 1 si sequence a utiliser / 0 sinon # -# hmbc : 1 si sequence a utiliser / 0 sinon # -# hsqc : 1 si sequence a utiliser / 0 sinon # -# jres : 1 si sequence a utiliser / 0 sinon # -# tocsy : 1 si sequence a utiliser / 0 sinon # -# tolPpm1 : tolerance autorisee autour de la valeur1 du couple de deplacements chimiques # -# tolPpm2HJRes : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si H dans dimension 2 # -# tolPpm2C : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si C dans dimension 2 # -# seuil : valeur du score de presence en dela de laquelle les metabolites annotes ne sont pas retenus # -# unicite : boolean pour ne retenir que les ... # -################################################################################################### -## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S) -mean.rmNa <- function(x) { - mean(x, na.rm = TRUE) -} - -annotationRmn2DGlobale <- function(template, tolPpm1 = 0.01, tolPpm2HJRes = 0.002, tolPpm2C = 0.5, cosy = 1, hmbc = 1, hsqc = 1, jres = 1, tocsy = 1, seuil, unicite = "NO") { - ## Initialisation - options(max.print = 999999999) - annotationCOSY <- data.frame() - annotationHMBC <- data.frame() - annotationHSQC <- data.frame() - annotationJRES <- data.frame() - annotationTOCSY <- data.frame() - - dataCOSY <- "NA" - dataHMBC <- "NA" - dataHSQC <- "NA" - dataJRES <- "NA" - dataTOCSY <- "NA" - - ## Application seuil seulement si annotation avec 1 seule sequence - seuilPls2D <- seuil - - if (cosy == 1) { - matrice.cosy <- read.xlsx(template, sheet = "COSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") - matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ] - annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) - dataCOSY <- data.frame(Metabolite = str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY = annotationCOSY$liste_resultat$score) - dataCOSY <- unique.data.frame(dataCOSY) - } - - if (hmbc == 1) { - matrice.hmbc <- read.xlsx(template, sheet = "HMBC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") - matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ] - annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) - dataHMBC <- data.frame(Metabolite = str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC = annotationHMBC$liste_resultat$score) - dataHMBC <- unique.data.frame(dataHMBC) - } - - if (hsqc == 1) { - matrice.hsqc <- read.xlsx(template, sheet = "HSQC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") - matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ] - annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) - dataHSQC <- data.frame(Metabolite = str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC = annotationHSQC$liste_resultat$score) - dataHSQC <- unique.data.frame(dataHSQC) - } - - if (jres == 1) { - matrice.jres <- read.xlsx(template, sheet = "JRES", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") - matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ] - annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2HJRes, seuil = seuilPls2D, unicite = unicite) - dataJRES <- data.frame(Metabolite = str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES = annotationJRES$liste_resultat$score) - dataJRES <- unique.data.frame(dataJRES) - } - - if (tocsy == 1) { - matrice.tocsy <- read.xlsx(template, sheet = "TOCSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") - matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ] - annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) - dataTOCSY <- data.frame(Metabolite = str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY = annotationTOCSY$liste_resultat$score) - dataTOCSY <- unique.data.frame(dataTOCSY) - } - - seqCombiMeanScoreSeuil <- data.frame() - seqCombiMeanScoreSeuilFiltre <- data.frame() - - ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES - data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY) - whichSequenceNaN <- which((data2D != "NA")) - data2D <- data2D[whichSequenceNaN] - sequencesCombination <- data.frame(data2D[1]) - seqCombiMeanScore <- sequencesCombination - - ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D - if (length(data2D) >= 2) { - ## CONCATENATION SCORE PAR SEQUENCE - for (l in 2:length(data2D)) - sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by = "Metabolite", all.x = TRUE, all.y = TRUE) - - ## Replacement of NA values due to mis annotation - for (m in seq_len(nrow(sequencesCombination))) { - COSYcompound <- sort(names(BdDReference_COSY)) - HMBCcompound <- sort(names(BdDReference_HMBC)) - HSQCcompound <- sort(names(BdDReference_HSQC)) - JREScompound <- sort(names(BdDReference_JRES)) - TOCSYcompound <- sort(names(BdDReference_TOCSY)) - - if (is.na(sequencesCombination[m, 2])) { - compound <- as.character(sequencesCombination[m, 1]) - for (c in seq_len(length(COSYcompound))) - if (str_to_lower(compound) == str_to_lower(COSYcompound[c])) - sequencesCombination[m, 2] <- 0 - } - - if (is.na(sequencesCombination[m, 3])) { - compound <- as.character(sequencesCombination[m, 1]) - for (c in seq_len(length(HMBCcompound))) - if (str_to_lower(compound) == str_to_lower(HMBCcompound[c])) - sequencesCombination[m, 3] <- 0 - } - - if (is.na(sequencesCombination[m, 4])) { - compound <- as.character(sequencesCombination[m, 1]) - for (c in seq_len(length(HSQCcompound))) - if (str_to_lower(compound) == str_to_lower(HSQCcompound[c])) - sequencesCombination[m, 4] <- 0 - } - - if (is.na(sequencesCombination[m, 5])) { - compound <- as.character(sequencesCombination[m, 1]) - for (c in seq_len(length(JREScompound))) - if (str_to_lower(compound) == str_to_lower(JREScompound[c])) - sequencesCombination[m, 5] <- 0 - } - - if (is.na(sequencesCombination[m, 6])) { - compound <- as.character(sequencesCombination[m, 1]) - for (c in seq_len(length(TOCSYcompound))) - if (str_to_lower(compound) == str_to_lower(TOCSYcompound[c])) - sequencesCombination[m, 6] <- 0 - } - } - - ## SCORE MOYEN (sans prise en compte valeurs manquantes) - meanScore <- round(apply(sequencesCombination[, -1], 1, FUN = mean.rmNa), 2) - seqCombiMeanScore <- cbind.data.frame(sequencesCombination, averageScore = meanScore) - - ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL - seqCombiMeanScoreSeuilFiltre <- seqCombiMeanScore[seqCombiMeanScore$averageScore > seuil, ] - } - - return(list(COSY = annotationCOSY, HMBC = annotationHMBC, HSQC = annotationHSQC, JRES = annotationJRES, TOCSY = annotationTOCSY, combination = seqCombiMeanScoreSeuilFiltre)) -} +################################################################################################### +# ANNOTATION SPECTRE 2D MATRICE COMPLEXE BASEE SUR UNE (OU PLUSIEURS) SEQUENCE(s) # +# template : dataframe contenant la liste des couples de deplacements chimiques de la matrice complexe a annoter # +# cosy : 1 si sequence a utiliser / 0 sinon # +# hmbc : 1 si sequence a utiliser / 0 sinon # +# hsqc : 1 si sequence a utiliser / 0 sinon # +# jres : 1 si sequence a utiliser / 0 sinon # +# tocsy : 1 si sequence a utiliser / 0 sinon # +# tolPpm1 : tolerance autorisee autour de la valeur1 du couple de deplacements chimiques # +# tolPpm2HJRes : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si H dans dimension 2 # +# tolPpm2C : tolerance autorisee autour de la valeur2 du couple de deplacements chimiques si C dans dimension 2 # +# seuil : valeur du score de presence en dela de laquelle les metabolites annotes ne sont pas retenus # +# unicite : boolean pour ne retenir que les ... # +################################################################################################### +## CALCUL MOYENNE SANS VALEUR(S) MANQUANTE(S) +mean.rmNa <- function(x) { + mean(x, na.rm = TRUE) +} + +annotationRmn2DGlobale <- function(template, tolPpm1 = 0.01, tolPpm2HJRes = 0.002, tolPpm2C = 0.5, cosy = 1, hmbc = 1, hsqc = 1, jres = 1, tocsy = 1, seuil, unicite = "NO") { + ## Initialisation + options(max.print = 999999999) + annotationCOSY <- data.frame() + annotationHMBC <- data.frame() + annotationHSQC <- data.frame() + annotationJRES <- data.frame() + annotationTOCSY <- data.frame() + + dataCOSY <- "NA" + dataHMBC <- "NA" + dataHSQC <- "NA" + dataJRES <- "NA" + dataTOCSY <- "NA" + + ## Application seuil seulement si annotation avec 1 seule sequence + seuilPls2D <- seuil + + if (cosy == 1) { + matrice.cosy <- read.xlsx(template, sheet = "COSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") + matrice.cosy <- matrice.cosy[matrice.cosy$peak.index != "x", ] + annotationCOSY <- annotationRmn2D(matrice.cosy, BdDReference_COSY, "COSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) + dataCOSY <- data.frame(Metabolite = str_to_lower(annotationCOSY$liste_resultat$Metabolite), score.COSY = annotationCOSY$liste_resultat$score) + dataCOSY <- unique.data.frame(dataCOSY) + } + + if (hmbc == 1) { + matrice.hmbc <- read.xlsx(template, sheet = "HMBC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") + matrice.hmbc <- matrice.hmbc[matrice.hmbc$peak.index != "x", ] + annotationHMBC <- annotationRmn2D(matrice.hmbc, BdDReference_HMBC, "HMBC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) + dataHMBC <- data.frame(Metabolite = str_to_lower(annotationHMBC$liste_resultat$Metabolite), score.HMBC = annotationHMBC$liste_resultat$score) + dataHMBC <- unique.data.frame(dataHMBC) + } + + if (hsqc == 1) { + matrice.hsqc <- read.xlsx(template, sheet = "HSQC", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") + matrice.hsqc <- matrice.hsqc[matrice.hsqc$peak.index != "x", ] + annotationHSQC <- annotationRmn2D(matrice.hsqc, BdDReference_HSQC, "HSQC", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2C, seuil = seuilPls2D, unicite = unicite) + dataHSQC <- data.frame(Metabolite = str_to_lower(annotationHSQC$liste_resultat$Metabolite), score.HSQC = annotationHSQC$liste_resultat$score) + dataHSQC <- unique.data.frame(dataHSQC) + } + + if (jres == 1) { + matrice.jres <- read.xlsx(template, sheet = "JRES", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") + matrice.jres <- matrice.jres[matrice.jres$peak.index != "x", ] + annotationJRES <- annotationRmn2D(matrice.jres, BdDReference_JRES, "JRES", ppm1Tol = tolPpm1, ppm2Tol = tolPpm2HJRes, seuil = seuilPls2D, unicite = unicite) + dataJRES <- data.frame(Metabolite = str_to_lower(annotationJRES$liste_resultat$Metabolite), score.JRES = annotationJRES$liste_resultat$score) + dataJRES <- unique.data.frame(dataJRES) + } + + if (tocsy == 1) { + matrice.tocsy <- read.xlsx(template, sheet = "TOCSY", startRow = 2, colNames = TRUE, rowNames = FALSE, cols = 1:3, na.strings = "NA") + matrice.tocsy <- matrice.tocsy[matrice.tocsy$peak.index != "x", ] + annotationTOCSY <- annotationRmn2D(matrice.tocsy, BdDReference_TOCSY, "TOCSY", ppm1Tol = tolPpm1, ppm2Tol = tolPpm1, seuil = seuilPls2D, unicite = unicite) + dataTOCSY <- data.frame(Metabolite = str_to_lower(annotationTOCSY$liste_resultat$Metabolite), score.TOCSY = annotationTOCSY$liste_resultat$score) + dataTOCSY <- unique.data.frame(dataTOCSY) + } + + seqCombiMeanScoreSeuil <- data.frame() + seqCombiMeanScoreSeuilFiltre <- data.frame() + + ## CONCATENATION RESULTATS DIFFERENTES SEQUENCES + data2D <- list(dataCOSY, dataHMBC, dataHSQC, dataJRES, dataTOCSY) + whichSequenceNaN <- which((data2D != "NA")) + data2D <- data2D[whichSequenceNaN] + sequencesCombination <- data.frame(data2D[1]) + seqCombiMeanScore <- sequencesCombination + + ## Si une seule sequence et seuil sur score = filtre applique dans la fonction annotationRmn2D + if (length(data2D) >= 2) { + ## CONCATENATION SCORE PAR SEQUENCE + for (l in 2:length(data2D)) { + sequencesCombination <- merge.data.frame(sequencesCombination, data2D[l], by = "Metabolite", all.x = TRUE, all.y = TRUE) + } + + ## Replacement of NA values due to mis annotation + for (m in seq_len(nrow(sequencesCombination))) { + COSYcompound <- sort(names(BdDReference_COSY)) + HMBCcompound <- sort(names(BdDReference_HMBC)) + HSQCcompound <- sort(names(BdDReference_HSQC)) + JREScompound <- sort(names(BdDReference_JRES)) + TOCSYcompound <- sort(names(BdDReference_TOCSY)) + + if (is.na(sequencesCombination[m, 2])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(COSYcompound))) { + if (str_to_lower(compound) == str_to_lower(COSYcompound[c])) { + sequencesCombination[m, 2] <- 0 + } + } + } + + if (is.na(sequencesCombination[m, 3])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(HMBCcompound))) { + if (str_to_lower(compound) == str_to_lower(HMBCcompound[c])) { + sequencesCombination[m, 3] <- 0 + } + } + } + + if (is.na(sequencesCombination[m, 4])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(HSQCcompound))) { + if (str_to_lower(compound) == str_to_lower(HSQCcompound[c])) { + sequencesCombination[m, 4] <- 0 + } + } + } + + if (is.na(sequencesCombination[m, 5])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(JREScompound))) { + if (str_to_lower(compound) == str_to_lower(JREScompound[c])) { + sequencesCombination[m, 5] <- 0 + } + } + } + + if (is.na(sequencesCombination[m, 6])) { + compound <- as.character(sequencesCombination[m, 1]) + for (c in seq_len(length(TOCSYcompound))) { + if (str_to_lower(compound) == str_to_lower(TOCSYcompound[c])) { + sequencesCombination[m, 6] <- 0 + } + } + } + } + + ## SCORE MOYEN (sans prise en compte valeurs manquantes) + meanScore <- round(apply(sequencesCombination[, -1], 1, FUN = mean.rmNa), 2) + seqCombiMeanScore <- cbind.data.frame(sequencesCombination, averageScore = meanScore) + + ## SUPPRESSION METABOLITE AVEC SCORE MOYEN < SEUIL + seqCombiMeanScoreSeuilFiltre <- seqCombiMeanScore[seqCombiMeanScore$averageScore > seuil, ] + } + + return(list(COSY = annotationCOSY, HMBC = annotationHMBC, HSQC = annotationHSQC, JRES = annotationJRES, TOCSY = annotationTOCSY, combination = seqCombiMeanScoreSeuilFiltre)) +}
--- a/annotationRmn2DWrapper.R Fri Feb 04 09:01:11 2022 +0000 +++ b/annotationRmn2DWrapper.R Thu Jan 23 15:28:44 2025 +0000 @@ -1,304 +1,354 @@ -#!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file - -## 201919016 2DNmrAnnotation_1.0.0.R -## Marie Tremblay-Franco -## MetaboHUB: The French Infrastructure for Metabolomics and Fluxomics -## marie.tremblay-franco@inrae.fr - -runExampleL <- FALSE - -if (runExampleL) { -##------------------------------ -## Example of arguments -##------------------------------ -} - - -##------------------------------ -## Options -##------------------------------ -strAsFacL <- options()$stringsAsFactors -options(stringsAsFactors = FALSE) -options(digits = 8, scipen = 3) - -##------------------------------ -## Constants -##------------------------------ -topEnvC <- environment() -flagC <- "\n" - - -##------------------------- -## Input parameters reading -##------------------------- - -##------------------------------ -## R libraries laoding -##------------------------------ -library(batch) -library(dplyr) -library(ggplot2) -library(openxlsx) -library(stringr) -library(tidyr) -library(curl) -library(jsonlite) -library(stringi) - -if (!runExampleL) - argLs <- parseCommandArgs(evaluate = FALSE) -logFile <- argLs[["logOut"]] -sink(logFile) - -cat("\tPACKAGE INFO\n") -sessionInfo() - -##------------------------------ -## Functions -##------------------------------ -source_local <- function(fname) { - argv <- commandArgs(trailingOnly = FALSE) - base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) - source(paste(base_dir, fname, sep = "/")) -} -#Import the different functions -source_local("annotationRmn2D.R") -source_local("annotationRmn2DGlobale.R") -source_local("viridis.R") - -## Input parameter values -fileToAnnotate <- argLs[[1]] - # Constraints values -ph <- argLs$pH -field <- argLs$magneticField - - # Chosen sequence(s) -cosy <- 0 -hmbc <- 0 -hsqc <- 0 -jres <- 0 -tocsy <- 0 - -if (argLs$cosy_2dsequences == "yes") { - cosy <- 1 - peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=cosy&token=9131jq9l8gsjn1j14t351h716u&max=500")) - peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) - if (ph != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] - if (field != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] - - if (nrow(peakforestSpectra) != 0) { - BdDReference_COSY <- peakforestSpectra$peaks - names(BdDReference_COSY) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] - names(BdDReference_COSY) <- enc2utf8(names(BdDReference_COSY)) - names(BdDReference_COSY) <- str_replace_all(names(BdDReference_COSY), "\u00e9", "e") - - for (k in seq_len(length(BdDReference_COSY))) { - peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_COSY[[k]][, 2], ppm.dim2 = BdDReference_COSY[[k]][, 1], - BdDReference_COSY[[k]][, 3:ncol(BdDReference_COSY[[k]])]) - BdDReference_COSY[[k]] <- peakforestSpectra_df - } - } else { - stop("No COSY spectra correspond to requested pH and/or magnetic field", call. = FALSE) - } - rm(peakforestSpectra) - rm(peakforestSpectra_df) -} - -if (argLs$hmbc_2dsequences == "yes") { - hmbc <- 1 - peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=hmbc&token=9131jq9l8gsjn1j14t351h716u&max=500")) - peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) - if (ph != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] - if (field != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] - - if (nrow(peakforestSpectra) != 0) { - - BdDReference_HMBC <- peakforestSpectra$peaks - names(BdDReference_HMBC) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] - names(BdDReference_HMBC) <- enc2utf8(names(BdDReference_HMBC)) - names(BdDReference_HMBC) <- str_replace_all(names(BdDReference_HMBC), "\u00e9", "e") - - peakforestSpectra_df <- data.frame() - for (k in seq_len(length(BdDReference_HMBC))) { - peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_HMBC[[k]][, 2], ppm.dim2 = BdDReference_HMBC[[k]][, 1], - BdDReference_HMBC[[k]][, 3:ncol(BdDReference_HMBC[[k]])]) - BdDReference_HMBC[[k]] <- peakforestSpectra_df - } - } else { - stop("No HMBC spectra correspond to requested pH and/or magnetic field", call. = FALSE) - } - rm(peakforestSpectra) - rm(peakforestSpectra_df) -} - -if (argLs$hsqc_2dsequences == "yes") { - hsqc <- 1 - peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=hsqc&token=9131jq9l8gsjn1j14t351h716u&max=500")) - peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) - - if (ph != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] - if (field != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] - - if (nrow(peakforestSpectra) != 0) { - BdDReference_HSQC <- peakforestSpectra$peaks - names(BdDReference_HSQC) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] - names(BdDReference_HSQC) <- enc2utf8(names(BdDReference_HSQC)) - names(BdDReference_HSQC) <- str_replace_all(names(BdDReference_HSQC), "\u00e9", "e") - - for (k in seq_len(length(BdDReference_HSQC))) { - peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_HSQC[[k]][, 2], ppm.dim2 = BdDReference_HSQC[[k]][, 1], - BdDReference_HSQC[[k]][, 3:ncol(BdDReference_HSQC[[k]])]) - BdDReference_HSQC[[k]] <- peakforestSpectra_df - } - } else { - stop("No HSQC spectra correspond to requested pH and/or magnetic field", call. = FALSE) - } - rm(peakforestSpectra) - rm(peakforestSpectra_df) -} - -if (argLs$jres_2dsequences == "yes") { - jres <- 1 - peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=jres&token=9131jq9l8gsjn1j14t351h716u&max=500")) - peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) - - if (ph != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] - if (field != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] - - if (nrow(peakforestSpectra) != 0) { - BdDReference_JRES <- peakforestSpectra$peaks - names(BdDReference_JRES) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] - names(BdDReference_JRES) <- enc2utf8(names(BdDReference_JRES)) - names(BdDReference_JRES) <- str_replace_all(names(BdDReference_JRES), "\u00e9", "e") - - for (k in seq_len(length(BdDReference_JRES))) { - peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_JRES[[k]][, 2], ppm.dim2 = BdDReference_JRES[[k]][, 1], - BdDReference_JRES[[k]][, 3:ncol(BdDReference_JRES[[k]])]) - BdDReference_JRES[[k]] <- peakforestSpectra_df - } - } else { - stop("No JRES spectra correspond to requested pH and/or magnetic field", call. = FALSE) - } - rm(peakforestSpectra) - rm(peakforestSpectra_df) -} - -if (argLs$tocsy_2dsequences == "yes") { - tocsy <- 1 - peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=tocsy&token=9131jq9l8gsjn1j14t351h716u&max=500")) - peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) - - if (ph != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] - if (field != 0) - peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] - - if (nrow(peakforestSpectra) != 0) { - BdDReference_TOCSY <- peakforestSpectra$peaks - names(BdDReference_TOCSY) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] - names(BdDReference_TOCSY) <- enc2utf8(names(BdDReference_TOCSY)) - names(BdDReference_TOCSY) <- str_replace_all(names(BdDReference_TOCSY), "\u00e9", "e") - - for (k in seq_len(length(BdDReference_TOCSY))) { - peakforestSpectra_df <- data.frame(ppm.dim1 = BdDReference_TOCSY[[k]][, 2], ppm.dim2 = BdDReference_TOCSY[[k]][, 1], - BdDReference_TOCSY[[k]][, 3:ncol(BdDReference_TOCSY[[k]])]) - BdDReference_TOCSY[[k]] <- peakforestSpectra_df - } - } else { - stop("No TOCSY spectra correspond to requested pH and/or magnetic field", call. = FALSE) - } - rm(peakforestSpectra) - rm(peakforestSpectra_df) -} - -if (argLs$cosy_2dsequences == "no" & argLs$hmbc_2dsequences == "no" & argLs$hsqc_2dsequences == "no" & argLs$jres_2dsequences == "no" & argLs$tocsy_2dsequences == "no") - stop("No chosen sequence. You have to choose at least 1 sequence", call. = FALSE) - - - # User database - - - # Allowed chemical shifts -tolPpm1 <- argLs$tolppm1 -tolPpm2HJRes <- argLs$tolppmJRES -tolPpm2C <- argLs$tolppm2 - # Threshold to remove metabolites (probability score < threshold) -seuil <- argLs$threshold -# Remove metabolites when multiple assignations? -unicite <- str_to_upper(argLs$unicity) - -## Output paramater values -AnnotationGraph <- argLs[["AnnotationGraph"]] - -print(argLs) - -## ANNOTATION -st0 <- Sys.time() -pdf(AnnotationGraph, onefile = TRUE) -annotationMelange <- annotationRmn2DGlobale(fileToAnnotate, tolPpm1 = tolPpm1, tolPpm2HJRes = tolPpm2HJRes, - tolPpm2C = tolPpm2C, cosy = cosy, hmbc = hmbc, hsqc = hsqc, - jres = jres, tocsy = tocsy, seuil = seuil, unicite = unicite) -dev.off() - -if (cosy == 1) { - write.table(annotationMelange$COSY$liste_resultat, file = argLs[["annotationCOSY"]], quote = FALSE, - row.names = FALSE, sep = "\t") - if (nrow(annotationMelange$COSY$listing_ppm_commun) != 0) - write.table(annotationMelange$COSY$listing_ppm_commun, file = argLs[["ppmCommunCOSY"]], quote = FALSE, - row.names = FALSE, sep = "\t") -} - -if (hmbc == 1) { - write.table(annotationMelange$HMBC$liste_resultat, file = argLs[["annotationHMBC"]], quote = FALSE, - row.names = FALSE, sep = "\t") - if (nrow(annotationMelange$HMBC$listing_ppm_commun) != 0) - write.table(annotationMelange$HMBC$listing_ppm_commun, file = argLs[["ppmCommunHMBC"]], quote = FALSE, - row.names = FALSE, sep = "\t") -} - -if (hsqc == 1) { - write.table(annotationMelange$HSQC$liste_resultat, file = argLs[["annotationHSQC"]], quote = FALSE, - row.names = FALSE, sep = "\t") - if (nrow(annotationMelange$HSQC$listing_ppm_commun) != 0) - write.table(annotationMelange$HSQC$listing_ppm_commun, file = argLs[["ppmCommunHSQC"]], quote = FALSE, - row.names = FALSE, sep = "\t") -} - -if (jres == 1) { - write.table(annotationMelange$JRES$liste_resultat, file = argLs[["annotationJRES"]], quote = FALSE, - row.names = FALSE, sep = "\t") - if (nrow(annotationMelange$JRES$listing_ppm_commun) != 0) - write.table(annotationMelange$JRES$listing_ppm_commun, file = argLs[["ppmCommunJRES"]], quote = FALSE, - row.names = FALSE, sep = "\t") -} - -if (tocsy == 1) { - write.table(annotationMelange$TOCSY$liste_resultat, file = argLs[["annotationTOCSY"]], quote = FALSE, - row.names = FALSE, sep = "\t") - if (nrow(annotationMelange$TOCSY$listing_ppm_commun) != 0) - write.table(annotationMelange$TOCSY$listing_ppm_commun, file = argLs[["ppmCommunTOCSY"]], quote = FALSE, - row.names = FALSE, sep = "\t") -} - -## Combinaison de sequences -if (cosy + jres + hmbc + hsqc + tocsy > 1) { - write.table(annotationMelange$combination, file = argLs[["annotationCombination"]], quote = FALSE, - row.names = FALSE, sep = "\t") -} -st1 <- Sys.time() -print(st1 - st0) - -## Ending -##-------- -cat("\nEnd of '2D NMR annotation' Galaxy module call: ", as.character(Sys.time()), sep = "") -sink() -options(stringsAsFactors = strAsFacL) -rm(list = ls()) +#!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file + +## 201919016 2DNmrAnnotation_1.0.0.R +## Marie Tremblay-Franco +## MetaboHUB: The French Infrastructure for Metabolomics and Fluxomics +## marie.tremblay-franco@inrae.fr + +runExampleL <- FALSE + +if (runExampleL) { + ## ------------------------------ + ## Example of arguments + ## ------------------------------ +} + + +## ------------------------------ +## Options +## ------------------------------ +strAsFacL <- options()$stringsAsFactors +options(stringsAsFactors = FALSE) +options(digits = 8, scipen = 3) + +## ------------------------------ +## Constants +## ------------------------------ +topEnvC <- environment() +flagC <- "\n" + + +## ------------------------- +## Input parameters reading +## ------------------------- + +## ------------------------------ +## R libraries laoding +## ------------------------------ +library(batch) +library(dplyr) +library(ggplot2) +library(openxlsx) +library(stringr) +library(tidyr) +library(curl) +library(jsonlite) +library(stringi) + +if (!runExampleL) { + argLs <- parseCommandArgs(evaluate = FALSE) +} +logFile <- argLs[["logOut"]] +sink(logFile) + +cat("\tPACKAGE INFO\n") +sessionInfo() + +## ------------------------------ +## Functions +## ------------------------------ +source_local <- function(fname) { + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep = "/")) +} +# Import the different functions +source_local("annotationRmn2D.R") +source_local("annotationRmn2DGlobale.R") +source_local("viridis.R") + +## Input parameter values +fileToAnnotate <- argLs[[1]] +# Constraints values +ph <- argLs$pH +field <- argLs$magneticField + +# Chosen sequence(s) +cosy <- 0 +hmbc <- 0 +hsqc <- 0 +jres <- 0 +tocsy <- 0 + +if (argLs$cosy_2dsequences == "yes") { + cosy <- 1 + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=cosy&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + if (ph != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + } + if (field != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] + } + + if (nrow(peakforestSpectra) != 0) { + BdDReference_COSY <- peakforestSpectra$peaks + names(BdDReference_COSY) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_COSY) <- enc2utf8(names(BdDReference_COSY)) + names(BdDReference_COSY) <- str_replace_all(names(BdDReference_COSY), "\u00e9", "e") + + for (k in seq_len(length(BdDReference_COSY))) { + peakforestSpectra_df <- data.frame( + ppm.dim1 = BdDReference_COSY[[k]][, 2], ppm.dim2 = BdDReference_COSY[[k]][, 1], + BdDReference_COSY[[k]][, 3:ncol(BdDReference_COSY[[k]])] + ) + BdDReference_COSY[[k]] <- peakforestSpectra_df + } + } else { + stop("No COSY spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) +} + +if (argLs$hmbc_2dsequences == "yes") { + hmbc <- 1 + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=hmbc&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + if (ph != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + } + if (field != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] + } + + if (nrow(peakforestSpectra) != 0) { + BdDReference_HMBC <- peakforestSpectra$peaks + names(BdDReference_HMBC) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_HMBC) <- enc2utf8(names(BdDReference_HMBC)) + names(BdDReference_HMBC) <- str_replace_all(names(BdDReference_HMBC), "\u00e9", "e") + + peakforestSpectra_df <- data.frame() + for (k in seq_len(length(BdDReference_HMBC))) { + peakforestSpectra_df <- data.frame( + ppm.dim1 = BdDReference_HMBC[[k]][, 2], ppm.dim2 = BdDReference_HMBC[[k]][, 1], + BdDReference_HMBC[[k]][, 3:ncol(BdDReference_HMBC[[k]])] + ) + BdDReference_HMBC[[k]] <- peakforestSpectra_df + } + } else { + stop("No HMBC spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) +} + +if (argLs$hsqc_2dsequences == "yes") { + hsqc <- 1 + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=hsqc&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + + if (ph != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + } + if (field != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] + } + + if (nrow(peakforestSpectra) != 0) { + BdDReference_HSQC <- peakforestSpectra$peaks + names(BdDReference_HSQC) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_HSQC) <- enc2utf8(names(BdDReference_HSQC)) + names(BdDReference_HSQC) <- str_replace_all(names(BdDReference_HSQC), "\u00e9", "e") + + for (k in seq_len(length(BdDReference_HSQC))) { + peakforestSpectra_df <- data.frame( + ppm.dim1 = BdDReference_HSQC[[k]][, 2], ppm.dim2 = BdDReference_HSQC[[k]][, 1], + BdDReference_HSQC[[k]][, 3:ncol(BdDReference_HSQC[[k]])] + ) + BdDReference_HSQC[[k]] <- peakforestSpectra_df + } + } else { + stop("No HSQC spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) +} + +if (argLs$jres_2dsequences == "yes") { + jres <- 1 + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=jres&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + + if (ph != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + } + if (field != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] + } + + if (nrow(peakforestSpectra) != 0) { + BdDReference_JRES <- peakforestSpectra$peaks + names(BdDReference_JRES) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_JRES) <- enc2utf8(names(BdDReference_JRES)) + names(BdDReference_JRES) <- str_replace_all(names(BdDReference_JRES), "\u00e9", "e") + + for (k in seq_len(length(BdDReference_JRES))) { + peakforestSpectra_df <- data.frame( + ppm.dim1 = BdDReference_JRES[[k]][, 2], ppm.dim2 = BdDReference_JRES[[k]][, 1], + BdDReference_JRES[[k]][, 3:ncol(BdDReference_JRES[[k]])] + ) + BdDReference_JRES[[k]] <- peakforestSpectra_df + } + } else { + stop("No JRES spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) +} + +if (argLs$tocsy_2dsequences == "yes") { + tocsy <- 1 + peakforestSpectra <- readLines(curl("https://metabohub.peakforest.org/rest/v1/spectra/nmr2d/search?query=tocsy&token=9131jq9l8gsjn1j14t351h716u&max=500")) + peakforestSpectra <- fromJSON(peakforestSpectra, simplifyDataFrame = TRUE) + + if (ph != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$sampleNMRTubeConditionsMetadata$potentiaHydrogenii == ph, ] + } + if (field != 0) { + peakforestSpectra <- peakforestSpectra[peakforestSpectra$analyzerNMRSpectrometerDevice$magneticFieldStrenght == field, ] + } + + if (nrow(peakforestSpectra) != 0) { + BdDReference_TOCSY <- peakforestSpectra$peaks + names(BdDReference_TOCSY) <- str_split(peakforestSpectra[, 2], simplify = TRUE, pattern = ";")[, 1] + names(BdDReference_TOCSY) <- enc2utf8(names(BdDReference_TOCSY)) + names(BdDReference_TOCSY) <- str_replace_all(names(BdDReference_TOCSY), "\u00e9", "e") + + for (k in seq_len(length(BdDReference_TOCSY))) { + peakforestSpectra_df <- data.frame( + ppm.dim1 = BdDReference_TOCSY[[k]][, 2], ppm.dim2 = BdDReference_TOCSY[[k]][, 1], + BdDReference_TOCSY[[k]][, 3:ncol(BdDReference_TOCSY[[k]])] + ) + BdDReference_TOCSY[[k]] <- peakforestSpectra_df + } + } else { + stop("No TOCSY spectra correspond to requested pH and/or magnetic field", call. = FALSE) + } + rm(peakforestSpectra) + rm(peakforestSpectra_df) +} + +if (argLs$cosy_2dsequences == "no" & argLs$hmbc_2dsequences == "no" & argLs$hsqc_2dsequences == "no" & argLs$jres_2dsequences == "no" & argLs$tocsy_2dsequences == "no") { + stop("No chosen sequence. You have to choose at least 1 sequence", call. = FALSE) +} + + +# User database + + +# Allowed chemical shifts +tolPpm1 <- argLs$tolppm1 +tolPpm2HJRes <- argLs$tolppmJRES +tolPpm2C <- argLs$tolppm2 +# Threshold to remove metabolites (probability score < threshold) +seuil <- argLs$threshold +# Remove metabolites when multiple assignations? +unicite <- str_to_upper(argLs$unicity) + +## Output paramater values +AnnotationGraph <- argLs[["AnnotationGraph"]] + +print(argLs) + +## ANNOTATION +st0 <- Sys.time() +pdf(AnnotationGraph, onefile = TRUE) +annotationMelange <- annotationRmn2DGlobale(fileToAnnotate, + tolPpm1 = tolPpm1, tolPpm2HJRes = tolPpm2HJRes, + tolPpm2C = tolPpm2C, cosy = cosy, hmbc = hmbc, hsqc = hsqc, + jres = jres, tocsy = tocsy, seuil = seuil, unicite = unicite +) +dev.off() + +if (cosy == 1) { + write.table(annotationMelange$COSY$liste_resultat, + file = argLs[["annotationCOSY"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + if (nrow(annotationMelange$COSY$listing_ppm_commun) != 0) { + write.table(annotationMelange$COSY$listing_ppm_commun, + file = argLs[["ppmCommunCOSY"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + } +} + +if (hmbc == 1) { + write.table(annotationMelange$HMBC$liste_resultat, + file = argLs[["annotationHMBC"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + if (nrow(annotationMelange$HMBC$listing_ppm_commun) != 0) { + write.table(annotationMelange$HMBC$listing_ppm_commun, + file = argLs[["ppmCommunHMBC"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + } +} + +if (hsqc == 1) { + write.table(annotationMelange$HSQC$liste_resultat, + file = argLs[["annotationHSQC"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + if (nrow(annotationMelange$HSQC$listing_ppm_commun) != 0) { + write.table(annotationMelange$HSQC$listing_ppm_commun, + file = argLs[["ppmCommunHSQC"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + } +} + +if (jres == 1) { + write.table(annotationMelange$JRES$liste_resultat, + file = argLs[["annotationJRES"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + if (nrow(annotationMelange$JRES$listing_ppm_commun) != 0) { + write.table(annotationMelange$JRES$listing_ppm_commun, + file = argLs[["ppmCommunJRES"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + } +} + +if (tocsy == 1) { + write.table(annotationMelange$TOCSY$liste_resultat, + file = argLs[["annotationTOCSY"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + if (nrow(annotationMelange$TOCSY$listing_ppm_commun) != 0) { + write.table(annotationMelange$TOCSY$listing_ppm_commun, + file = argLs[["ppmCommunTOCSY"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) + } +} + +## Combinaison de sequences +if (cosy + jres + hmbc + hsqc + tocsy > 1) { + write.table(annotationMelange$combination, + file = argLs[["annotationCombination"]], quote = FALSE, + row.names = FALSE, sep = "\t" + ) +} +st1 <- Sys.time() +print(st1 - st0) + +## Ending +## -------- +cat("\nEnd of '2D NMR annotation' Galaxy module call: ", as.character(Sys.time()), sep = "") +sink() +options(stringsAsFactors = strAsFacL) +rm(list = ls())
--- a/annotationRmn2D_xml.xml Fri Feb 04 09:01:11 2022 +0000 +++ b/annotationRmn2D_xml.xml Thu Jan 23 15:28:44 2025 +0000 @@ -1,4 +1,4 @@ -<tool id="2DNmrAnnotation" name="2DNMRAnnotation" version="2.0.0" profile="20.09"> +<tool id="2DNmrAnnotation" name="2DNMRAnnotation" version="2.0.0+galaxy2" profile="20.09"> <description> Annotation of complex mixture bidimensional NMR spectra </description> @@ -11,14 +11,25 @@ <requirement type="package" version="1.0.2">r-tidyr</requirement> <requirement type="package" version="3.3">r-curl</requirement> <requirement type="package" version="1.6">r-jsonlite</requirement> - <requirement type="package">r-stringi</requirement> + <requirement type="package" version="1">r-stringi</requirement> </requirements> - + <required_files> + <include path="annotationRmn2DWrapper.R" /> + <include path="annotationRmn2D.R" /> + <include path="annotationRmn2DGlobale.R" /> + <include path="viridis.R" /> + <include path="BdDReference_COSY.RData" /> + <include path="BdDReference_HMBC.RData" /> + <include path="BdDReference_HSQC.RData" /> + <include path="BdDReference_JRES.RData" /> + <include path="BdDReference_NOESY.RData" /> + <include path="BdDReference_TOCSY.RData" /> + </required_files> <stdio> <exit_code range="1:" level="fatal" /> </stdio> - <command> + <command detect_errors="aggressive"><![CDATA[ ## Wrapper + Libraries of 2D-NMR sequences for reference compounds Rscript '$__tool_directory__/annotationRmn2DWrapper.R' @@ -86,7 +97,7 @@ ppmCommunTOCSY '$ppmCommunTOCSY' annotationCombination '$annotationCombination' AnnotationGraph '$AnnotationGraph' - </command> + ]]></command> <inputs> <param name="zip_xlsfile" type="data" format="xlsx" label="File to annotate in xlsx format" /> @@ -230,14 +241,16 @@ <data format="pdf" name="AnnotationGraph" label="${tool.name}_graph" /> </outputs> <tests> - <test expect_num_outputs="13"> + <test expect_num_outputs="13" expect_test_failure="true"> <param name="zip_xlsfile" value="Template_melange.xlsx" ftype="xlsx"/> <param name="cosy_2dsequences" value="yes"/> <param name="jres_2dsequences" value="yes"/> <param name="hmbc_2dsequences" value="yes"/> <param name="hsqc_2dsequences" value="yes"/> <param name="tocsy_2dsequences" value="yes"/> - <param name="inHouse_DB_choices.choice" value="no"/> + <section name="inHouse_DB_choices"> + <param name="choice" value="no"/> + </section> <param name="tolppm1" value="0.01"/> <param name="tolppm2" value="0.5"/> <param name="tolppmJRES" value="0.002"/>
--- a/viridis.R Fri Feb 04 09:01:11 2022 +0000 +++ b/viridis.R Thu Jan 23 15:28:44 2025 +0000 @@ -1,26 +1,38 @@ -viridis <- function(n, alpha = 1, begin = 0, end = 1, direction = 1, option = "D") { - if (begin < 0 | begin > 1 | end < 0 | end > 1) { - stop("begin and end must be in [0,1]") - } - if (abs(direction) != 1) { - stop("direction must be 1 or -1") - } - if (direction == -1) { - tmp <- begin - begin <- end - end <- tmp - } - option <- switch(EXPR = option, A = "A", magma = "A", - B = "B", inferno = "B", C = "C", plasma = "C", - D = "D", viridis = "D", E = "E", cividis = "E", { - warning(paste0("Option '", option, "' does not exist. Defaulting to 'viridis'.")) - "D" - }) - map <- viridisLite::viridis.map[viridisLite::viridis.map$opt == - option, ] - map_cols <- grDevices::rgb(map$R, map$G, map$B) - fn_cols <- grDevices::colorRamp(map_cols, space = "Lab", - interpolate = "spline") - cols <- fn_cols(seq(begin, end, length.out = n)) / 255 - grDevices::rgb(cols[, 1], cols[, 2], cols[, 3], alpha = alpha) -} +viridis <- function(n, alpha = 1, begin = 0, end = 1, direction = 1, option = "D") { + if (begin < 0 | begin > 1 | end < 0 | end > 1) { + stop("begin and end must be in [0,1]") + } + if (abs(direction) != 1) { + stop("direction must be 1 or -1") + } + if (direction == -1) { + tmp <- begin + begin <- end + end <- tmp + } + option <- switch(EXPR = option, + A = "A", + magma = "A", + B = "B", + inferno = "B", + C = "C", + plasma = "C", + D = "D", + viridis = "D", + E = "E", + cividis = "E", + { + warning(paste0("Option '", option, "' does not exist. Defaulting to 'viridis'.")) + "D" + } + ) + map <- viridisLite::viridis.map[viridisLite::viridis.map$opt == + option, ] + map_cols <- grDevices::rgb(map$R, map$G, map$B) + fn_cols <- grDevices::colorRamp(map_cols, + space = "Lab", + interpolate = "spline" + ) + cols <- fn_cols(seq(begin, end, length.out = n)) / 255 + grDevices::rgb(cols[, 1], cols[, 2], cols[, 3], alpha = alpha) +}