Mercurial > repos > marie-tremblay-metatoul > nmr_annotation
changeset 0:a8e85c07dc7b draft
Uploaded
author | marie-tremblay-metatoul |
---|---|
date | Tue, 30 Jan 2018 05:37:10 -0500 |
parents | |
children | b55559a2854f |
files | nmr_annotation/.shed nmr_annotation/DrawSpec.R nmr_annotation/Library.Rdata nmr_annotation/ReadMe.txt nmr_annotation/asics_wrapper.R nmr_annotation/asics_xml.xml nmr_annotation/static/images/NmrAnnotation_ASICS.png nmr_annotation/test-data/ADG_007.zip nmr_annotation/test-data/MTBLS1_177_NMR_Annotation.pdf nmr_annotation/test-data/MTBLS1_177_NMR_Annotation_proportionEstimation.tabular |
diffstat | 10 files changed, 559 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nmr_annotation/.shed Tue Jan 30 05:37:10 2018 -0500 @@ -0,0 +1,7 @@ +categories: [Metabolomics] +description: '[Metabolomics][W4M][NMR] NMR Annotation - Annotation of complex mixture NMR spectra and metabolite proportion estimation' +homepage_url: http://workflow4metabolomics.org +long_description: 'Part of the W4M project: http://workflow4metabolomics.org' +name: nmr_annotation +owner: marie-tremblay-metatoul +remote_repository_url: https://github.com/workflow4metabolomics/nmr_annotation
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nmr_annotation/DrawSpec.R Tue Jan 30 05:37:10 2018 -0500 @@ -0,0 +1,74 @@ +drawSpec <- function (X, startP = -1, endP = -1, groupLabel = NULL, useLog = -1, highBound = -1, lowBound = -1, + xlab = NULL, ylab = NULL, main = NULL, nAxisPos = 4, offside = 0) +{ + groupLabel_name = groupLabel + X = as.data.frame(X) +# colnames(X) = c(1:ncol(X)) + X = as.matrix(X) + if (highBound != -1) { + for (i in 1:nrow(X)) { + myIndex = which(X[i, ] > highBound) + X[i, myIndex] = highBound + } + } + if (lowBound != -1) { + for (i in 1:nrow(X)) { + myIndex = which(X[i, ] < lowBound) + X[i, myIndex] = lowBound + } + } + if (is.null(groupLabel)) { + groupLabel = c(1:nrow(X)) + groupLabel = as.factor(groupLabel) + } + else { + levels(groupLabel) = c(1:length(levels(groupLabel))) + } + if (startP == -1) + startP = 1 + if (endP == -1) + endP = ncol(X) + if (is.null(xlab)) { + xlab = "index" + } + if (is.null(ylab)) { + ylab = "intensity" + } + if (is.null(main)) { + main = paste(" ", startP + offside, "-", endP + offside) + } + GraphRange <- c(startP:endP) + yn <- X[, GraphRange] + if (useLog != -1) + yn = log(yn) + if (length(yn) > ncol(X)) + { + plot(yn[1, ], ylim = c(min(yn), max(yn)), type = "n", ylab = ylab, xlab = xlab, main = main, xaxt = "n") + tempVal = trunc(length(GraphRange)/nAxisPos) + xPos = c(0:nAxisPos) * tempVal + axis(1, at = xPos, labels = colnames(X)[xPos + startP + offside]) + for (i in 1:length(levels(groupLabel))) + { + groupLabelIdx = which(groupLabel == levels(groupLabel)[i]) + color <- palette(rainbow(length(levels(groupLabel)))) + for (j in 1:length(groupLabelIdx)) + { + lines(yn[groupLabelIdx[j], ], col = color[i]) + } + } + if (!is.null(groupLabel_name)) + { + legendPos = "topleft" + legend(legendPos, levels(groupLabel_name), col = as.integer(levels(groupLabel)), text.col = "black", pch = c(19, 19), bg = "gray90") + } + } + if (length(yn) == ncol(X)) + { + plot(yn, ylim = c(min(yn), max(yn)), type = "n", ylab = ylab, xlab = xlab, main = main, xaxt = "n") + tempVal = trunc(length(GraphRange)/nAxisPos) + xPos = c(0:nAxisPos) * tempVal +# axis(1, at = xPos, labels = xPos + startP + offside) + axis(1, at = xPos, labels = colnames(X)[xPos + startP + offside]) + lines(yn) + } +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nmr_annotation/ReadMe.txt Tue Jan 30 05:37:10 2018 -0500 @@ -0,0 +1,35 @@ +Instructions to integrate the ""NMR Annotation" tool into a local instance of Galaxy +Version December 2017 M Tremblay-Franco + + +## --- R bin and Packages : --- ## +R version 3.0.2 (2013-09-25) -- "Frisbee Sailing +Platform: x86_64-redhat-linux-gnu (64-bit) + +Install the "batch" library, necessary for parseCommandArgs function and the "ASICS" library necessary for NMR spectra annotation: + - Download package source (*.tar.gz file) from your favorite CRAN (http://www.r-project.org/) +For example: http://cran.univ-lyon1.fr/ + + - Install package in your R session +install.packages("path/package_name.tar.gz",lib="path",repos=NULL) +For Example: install.packages("/usr/lib64/R/library/batch_1.1-4.tar",lib="/usr/lib64/R/library",repos=NULL) + + - Finally, load the packages into your R session +library(batch) +library(ASICS) + + +## --- Config : --- ## + - Edit the file "/galaxy/dist/galaxy-dist/tool_conf.xml" and add +<section id="id_name" name="Name"> + <tool file="path/asics_xml.xml" /> +</section> +to create a new section containing the asics_xml tool +or add + <tool file="path/asics_xml.xml" /> +in an existing section + + - Put the two files asics_xml.xml, asics_wrapper.R, all the needeed R functions and the Library.RData (including compound reference spectra) in a same directory +For example, path=/galaxy/dist/galaxy-dist/tools/annotation + +Finally, restart Galaxy \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nmr_annotation/asics_wrapper.R Tue Jan 30 05:37:10 2018 -0500 @@ -0,0 +1,191 @@ +#!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file + +## 29122017_asics_wrapper.R +## Remi Servien, Patrick Tardivel, Marie Tremblay-Franco and Gaelle Lefort +## marie.tremblay-franco@inra.fr + +runExampleL <- FALSE + +##------------------------------ +## Options +##------------------------------ +strAsFacL <- options()$stringsAsFactors +options(stringsAsFactors=FALSE) + + +##------------------------------ +## Libraries loading +##------------------------------ +# ParseCommandArgs function +library(batch) +library(ASICS) + + + +# R script call +source_local <- function(fname) +{ +argv <- commandArgs(trailingOnly=FALSE) +base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) +source(paste(base_dir, fname, sep="/")) +} +#Import the different functions +source_local("DrawSpec.R") + + +##------------------------------ +## Errors ????????????????????? +##------------------------------ + + +##------------------------------ +## Constants +##------------------------------ +topEnvC <- environment() +flagC <- "\n" + + +##------------------------------ +## Script +##------------------------------ +if(!runExampleL) + argLs <- parseCommandArgs(evaluate=FALSE) + +# Standards loading +load(argLs[["standards"]]) + +## Parameters Loading +##------------------- +# Inputs +## Spectrum to annotate +zipfile= argLs[["zipfile"]] +directory=unzip(zipfile, list=F) +directory=paste(getwd(),strsplit(directory[1],"/")[[1]][2],sep="/") + + +##Exclusion zone(s) +exclusionZones <- argLs[["zone_exclusion_choices.choice"]] +exclusionZonesBorders <- NULL +if (!is.null(argLs$zone_exclusion_left)) +{ + for(i in which(names(argLs)=="zone_exclusion_left")) + { +# exclusionZonesBorders <- c(exclusionZonesBorders,list(c(argLs[[i]],argLs[[i+1]]))) + exclusionZonesBorders <- c(exclusionZonesBorders,argLs[[i]],argLs[[i+1]]) + } +} + +## Maximal allowed shift +shift <- argLs[["shift"]] + +## Graphical zone(s) +graphicalZones <- argLs[["zone_graphical_choices.choice"]] +graphicalZonesBorders <- NULL +if (!is.null(argLs$zone_exclusion_left)) +{ + for(i in which(names(argLs)=="zone_graphical_left")) + { + graphicalZonesBorders <- c(graphicalZonesBorders,list(c(argLs[[i]],argLs[[i+1]]))) + } +} + +# Outputs +logOut <- argLs[["logOut"]] +proportionEstimation <- argLs[["proportionEstimation"]] +graphOut <- argLs[["graphOut"]] + +sink(logOut) +cat("\tPACKAGE INFO\n") +# pkgs=c("batch", "ASICS") +pkgs=c("batch", "ASICS") +for(pkg in pkgs) { + suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) + cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") +} +cat("\n") + + +## Checking arguments +##------------------- +error.stock <- "\n" +if(length(error.stock) > 1) + stop(error.stock) + + +## Computation +##------------ +annotation.Asics <- ASICS(directory, exclusion.areas=matrix(exclusionZonesBorders, byrow=T, ncol=2), + max.shift=shift, which.spectra="last", library.metabolites=NULL, + threshold.noise=0.02, seed=1234, nb.iter.signif=400) + + +## Saving +##------- +# Identified metabolites +metabolites.estimation <- present_metabolites(annotation.Asics) +colnames(metabolites.estimation) <- c("Metabolite",colnames(metabolites.estimation)[-1]) +write.table(metabolites.estimation,file=argLs$proportionEstimation,row.names=FALSE,quote=FALSE,sep="\t") + + +## Graphical display +##------------------ +# Raw and annotated spectra comparison +pdf(graphOut,onefile=TRUE) + +## Graphical output: overlay of raw and estimated spectra +ppm.metabolites.estimation <- data.frame(round(ppm_grid(annotation.Asics),3), + original_mixture(annotation.Asics)) +colnames(ppm.metabolites.estimation) <- c("PPM", "EstimatedProportion") +ppm.metabolites.estimation <- ppm.metabolites.estimation[order(ppm.metabolites.estimation[,1],decreasing=T), ] + +mix <- data.frame(t(ppm.metabolites.estimation[,2])) +colnames(mix) <- ppm.metabolites.estimation[,1] +ppm <- ppm.metabolites.estimation[,1] + +estimatedMix <- data.frame(round(ppm_grid(annotation.Asics),3), reconstituted_mixture(annotation.Asics)) +colnames(estimatedMix) <- c("PPM","EstimatedProportion") +estimatedMix <- estimatedMix[order(estimatedMix[,1],decreasing=T), ] +estimatedMix <- estimatedMix[,2] + +## Whole spectra +GraphRange <- 1:ncol(mix) +tempVal <- trunc(length(GraphRange)/10) +xPos <- c(10:0) * tempVal +plot(1:ncol(mix), mix, type='l', xlab="", main="", xaxt="n", ylab="") +axis(1, at=xPos, labels=colnames(mix)[xPos + 1]) +lines(estimatedMix, col="red") +legend("topleft",legend=c("Real Mixture","Estimated Composition"),lty=c(1,1),col=c("black","red")) + +## Zoomed spectral window depending on user-selected zone(s) +graphical.zone.length <- length(graphicalZonesBorders) +if (graphical.zone.length != 0) + + # par(mfrow=c(2,1)) +for (g in 1:graphical.zone.length) + { + print(g) + plot(1:length((which(round(as.numeric(colnames(mix)),2) == graphicalZonesBorders[[g]][1])[1]):(which(round(as.numeric(colnames(mix)),2) == max(graphicalZonesBorders[[g]][2],0.5))[1])), + mix[(which(round(as.numeric(colnames(mix)),2) == graphicalZonesBorders[[g]][1])[1]):(which(round(as.numeric(colnames(mix)),2) == max(graphicalZonesBorders[[g]][2],0.5))[1])], type='l', xlab="", ylab="Intensity", main="", xaxt="n") + lines(estimatedMix[(which(round(as.numeric(colnames(mix)),2) == graphicalZonesBorders[[g]][1])[1]):(which(round(as.numeric(colnames(mix)),2) == max(graphicalZonesBorders[[g]][2],0.5))[1])],col="red") + + xPos <- 1 + nAxisPos <- 4 + startP <- length(nAxisPos) + endP <- length((which(round(as.numeric(colnames(mix)),2) == graphicalZonesBorders[[g]][1])[1]):(which(round(as.numeric(colnames(mix)),2) == max(graphicalZonesBorders[[g]][2],0.5))[1])) + GraphRange <- c(startP:endP) + tempVal <- trunc(length(GraphRange)/nAxisPos) + xPos <- c(0:nAxisPos) * tempVal + noms <- ppm.metabolites.estimation[xPos + which(ppm == round(graphicalZonesBorders[[g]][1],1))[1],1] + axis(1, at=xPos, labels=noms) + } + +invisible(dev.off()) + + +## Ending +##--------------------- +cat("\nEnd of 'NMR annotation' Galaxy module call: ", as.character(Sys.time()), sep="") +options(stringsAsFactors=strAsFacL) +rm(list=ls()) +sink() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nmr_annotation/asics_xml.xml Tue Jan 30 05:37:10 2018 -0500 @@ -0,0 +1,212 @@ +<tool id="NmrAnnotation" name="NMR_Annotation" version="1.0.0"> + + <description> Annotation of complex mixture NMR spectra and metabolite proportion estimation </description> + + <command> + which Rscript ; + ## Wrapper + Rscript '$__tool_directory__/asics_wrapper.R' standards '$__tool_directory__/Library.Rdata' + + ## Bruker files + zipfile $zip_file + + ## Exclusion zone + zone_exclusion_choices.choice ${zone_exclusion_choices.choice} + #if str($zone_exclusion_choices.choice) == 'yes': + #for $i in $zone_exclusion_choices.conditions: + zone_exclusion_left ${i.zone_exclusion_left} + zone_exclusion_right ${i.zone_exclusion_right} + #end for + #end if + + ## Maximal shift (spectrum alignement) + shift $shift + + ## Graphical zone + zone_graphical_choices.choice ${zone_graphical_choices.choice} + #if str($zone_graphical_choices.choice) == 'yes': + #for $i in $zone_graphical_choices.conditions: + zone_graphical_left ${i.zone_graphical_left} + zone_graphical_right ${i.zone_graphical_right} + #end for + #end if + + ## Outputs + logOut $logOut + proportionEstimation $proportionEstimation + graphOut $graphOut + </command> + + <inputs> + <param name="zip_file" type="data" format="no_unzip.zip" label="Zip file" /> + + <conditional name="zone_exclusion_choices"> + <param name="choice" type="select" label="Exclusion zone(s)" help="Choose if you want to exclude particular zone(s)" > + <option value="yes" > yes </option> + <option value="no" selected="true"> no </option> + </param> + <when value="yes"> + <repeat name="conditions" title="exclusion zones"> + <param name="zone_exclusion_left" label="Left exclusion zone border" type="float" value="10.0" /> + <param name="zone_exclusion_right" label="Right exclusion zone border" type="float" value="10.0" /> + </repeat> + </when> + </conditional> + + <param name="shift" type="float" value="0.01" help="Maximal allowed shift for spectra alignment. Default value is 0.01 ppm" /> + + <conditional name="zone_graphical_choices"> + <param name="choice" type="select" label="Graphical zone(s)" help="Choose if you want to display particular zone(s)" > + <option value="yes" > yes </option> + <option value="no" selected="true"> no </option> + </param> + <when value="yes"> + <repeat name="conditions" title="grapical zones"> + <param name="zone_graphical_left" label="Left graphical zone border" type="float" value="10.0" /> + <param name="zone_graphical_right" label="Right graphical zone border" type="float" value="10.0" /> + </repeat> + </when> + <when value="no" /> + </conditional> + </inputs> + + <outputs> + <data format="txt" name="logOut" label="${tool.name}_log" /> + <data format="tabular" name="proportionEstimation" label="${tool.name}_proportionEstimation" /> + <data format="pdf" name="graphOut" label="${tool.name}_graph" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> +<help> + +.. class:: infomark + +**Authors** Marie Tremblay-Franco (marie.tremblay-franco@inra.fr), Patrick Tardivel (patrick.tardivel@inra.fr), RĂ©mi Servien (remi.servien@inra.fr) and Gaelle Lefort (gaelle.lefort@inra.fr) + +.. class:: infomark + +**Please cite** + +Tardivel P., Servien R. and Concordet D. Non asymptotic active set properties of lasso-type estimators in small-dimension (submitted) +Tardivel P., Servien R., Canlet C., Tremblay-Franco M., Debrauwer L. and Concordet D. ASICS: an automatic method for identification and quantification of metabolites in NMR 1D 1H spectra (in preparation) + +--------------------------------------------------- + +============== +NMR Annotation +============== + +----------- +Description +----------- + +ASICS, based on a strong statistical theory, handles automatically the metabolite identification and quantification + +----------------- +Workflow position +----------------- + +**Upstream tools** + +========================= ================= ======= ========= +Name output file format parameter +========================= ================= ======= ========= +NA NA NA NA +========================= ================= ======= ========= + + +**Downstream tools** + +========================= ================= ======= ========= +Name output file format parameter +========================= ================= ======= ========= +NA NA NA NA +========================= ================= ======= ========= + + + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : Choose your inputs | zip | ++---------------------------+------------+ + +**Choose your inputs** + +You have two methods for your inputs: + + | Zip file : You can put a zip file containing one condition (sample): myinputs.zip. + +.. image:: ./static/images/Mth_Architecture_Repertoire_Bruker.png + :width: 800 + +---------- +Parameters +---------- + +Exclusion zone(s) +| Spectral regions to exclude, water, solvent or contaminant resonances +| If YES: parameters **Lower exclusion zone** and **Upper exclusion zone** are visible, +| If NO: no zone to exclude +| Default value is NO +| + +Left exclusion zone +| Upper boundary of exclusion zone +| + +Right exclusion zone +| Lower boundary of exclusion zone + +| *Notes:* +| - these parameters can be used several times using the "Add new exclusion zones" button +| + +Shift +| Maximum variation of the chemical shift (due to experimental conditions) of a peak of a fixed metabolite allowed in the warping function +| + +Graphical zone(s) + | Spectral regions to display + | If YES: parameters **Lower graphical zone** and **Upper graphical zone** are visible, + | If NO: no zone to display + | Default value is NO + | + + +------------ +Output files +------------ + +proportionEstimation.tsv +| tabular output +| Array with p rows (corresponding to the identified metabolites) and 1 column containing the estimated relative intensities +| + +spectra.pdf +| pdf output +| Graphical chart of real and estimated spectrum (whole spectral width and zooms depending on exclusion zone(s)) +| + + +--------------------------------------------------- + +--------------- +Example +--------------- + + +See the W4M00005_mmusculus in the Shared Data/Published Histories menu +.. image:: ./static/images/NmrAnnotation_ASICS.png + :width: 100 + + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nmr_annotation/test-data/MTBLS1_177_NMR_Annotation_proportionEstimation.tabular Tue Jan 30 05:37:10 2018 -0500 @@ -0,0 +1,40 @@ +Metabolite Relative_Concentration +Creatinine 0.0569174516124917 +Citrate 0.0285947900676533 +AceticAcid 0.00931139835139612 +L-Glycine 0.00700618802859799 +ThreonicAcid 0.00312228402191229 +Lactose 0.00274471352269867 +Malonate 0.0026239869463794 +Indoxylsulfate 0.00256983041226207 +L-Arabitol 0.00247809838365845 +L-GlutamicAcid 0.00219750986486448 +Threitol 0.00185008716865122 +L-Alanine 0.00176514469439213 +HippuricAcid 0.00173759535227614 +Betaine 0.00163016358478906 +MalicAcid 0.00154536802871288 +1,3-Diaminopropane 0.00150129592201832 +2-oxoglutarate 0.00142533208121273 +Dimethylamine 0.00131575320617744 +L-Aspartate 0.00127266582522283 +AscorbicAcid 0.00100873973025415 +L-Asparagine 0.00099187237086438 +GuanidinoaceticAcid 0.000925290560878349 +Ethanolamine 0.00089279079362688 +Glycerol 0.00088244833185825 +5,6-Dihydro-5-Methyluracil 0.000849548732878672 +Creatine 0.00079525125560122 +MethylmalonicAcid 0.000792506060661937 +Xylitol 0.000787992683508674 +Taurine 0.000787744798563012 +Succinate 0.000527784171596945 +4-HydroxyphenylAceticAcid 0.000522123163032416 +Methylguanidine 0.000486125204031847 +Myo-Inositol 0.0004425385208034 +beta-HydroxyisovalericAcid 0.000419265217514307 +Propionate 0.000392879831159886 +TMAO 0.000391632855219129 +L-Valine 0.000329117139315598 +Isobutyrate 0.000312582908447652 +CholineChloride 0.000306877971629503