Mercurial > repos > yguitton > metams_rungc
changeset 5:b8d4129dd2a6 draft
planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit c7a518686137f6d62b7415715152e8d5a9953ed7
author | yguitton |
---|---|
date | Fri, 06 Sep 2019 06:09:10 -0400 |
parents | c10824185547 |
children | 286ebb9f6e84 |
files | lib_metams.r metaMS_runGC.r test-data/W4M0004_database_small.msp test-data/dataMatrix.tsv test-data/peakspectra.msp test-data/peaktable.tsv test-data/runGC.RData test-data/variableMetadata.tsv |
diffstat | 8 files changed, 111 insertions(+), 83 deletions(-) [+] |
line wrap: on
line diff
--- a/lib_metams.r Wed Jul 03 05:14:32 2019 -0400 +++ b/lib_metams.r Fri Sep 06 06:09:10 2019 -0400 @@ -162,13 +162,13 @@ ##ADDITIONS FROM Y. Guitton getBPC <- function(file,rtcor=NULL, ...) { object <- xcmsRaw(file) - sel <- profRange(object, ...) - cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) + sel <- profRange(object, ...) + cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) } getBPC2s <- function (files, xset = NULL, pdfname="BPCs.pdf", rt = c("raw","corrected"), scanrange=NULL) { require(xcms) - + #create sampleMetadata, get sampleMetadata and class if(!is.null(xset)) { #When files come from XCMS3 directly before metaMS @@ -184,10 +184,10 @@ } N <- dim(sampleMetadata)[1] - TIC <- vector("list",N) + BPC <- vector("list",N) for (j in 1:N) { - TIC[[j]] <- getBPC(files[j]) + BPC[[j]] <- getBPC(files[j]) #good for raw # seems strange for corrected #errors if scanrange used in xcmsSetgeneration @@ -196,7 +196,7 @@ }else{ rtcor <- NULL } - TIC[[j]] <- getBPC(files[j],rtcor=rtcor) + BPC[[j]] <- getBPC(files[j],rtcor=rtcor) } pdf(pdfname,w=16,h=10) @@ -204,8 +204,10 @@ lty = 1:N pch = 1:N #search for max x and max y in BPCs - xlim = range(sapply(TIC, function(x) range(x[,1]))) - ylim = range(sapply(TIC, function(x) range(x[,2]))) + + xlim = range(sapply(BPC, function(x) range(x[,1]))) + ylim = range(sapply(BPC, function(x) range(x[,2]))) + ylim = c(-ylim[2], ylim[2]) ##plot start @@ -216,15 +218,15 @@ plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") colvect<-NULL for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + bpc <- BPC[[classnames[[k]][j]]] + # points(bpc[,1]/60, bpc[,2], col = cols[i], pch = pch[i], type="l") + points(bpc[,1]/60, bpc[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + bpc <- BPC[[classnames[[l]][j]]] + points(bpc[,1]/60, -bpc[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(gsub("(^.+)\\..*$","\\1",basename(files[c(classnames[[k]],classnames[[l]])]))), col = colvect, lty = lty, pch = pch) @@ -239,15 +241,15 @@ plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + bpc <- BPC[[classnames[[k]][j]]] + # points(bpc[,1]/60, bpc[,2], col = cols[i], pch = pch[i], type="l") + points(bpc[,1]/60, bpc[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") colvect<-append(colvect,cols[classnames[[k]][j]]) } for (j in 1:length(classnames[[l]])) { # i=class2names[j] - tic <- TIC[[classnames[[l]][j]]] - points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") + bpc <- BPC[[classnames[[l]][j]]] + points(bpc[,1]/60, -bpc[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") colvect<-append(colvect,cols[classnames[[l]][j]]) } legend("topright",paste(gsub("(^.+)\\..*$","\\1",basename(files[c(classnames[[k]],classnames[[l]])]))), col = colvect, lty = lty, pch = pch) @@ -255,14 +257,16 @@ if (length(class)==1){ k=1 - ylim = range(sapply(TIC, function(x) range(x[,2]))) + + ylim = range(sapply(BPC, function(x) range(x[,2]))) + colvect<-NULL plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") for (j in 1:length(classnames[[k]])) { - tic <- TIC[[classnames[[k]][j]]] - # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") - points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") + bpc <- BPC[[classnames[[k]][j]]] + # points(bpc[,1]/60, bpc[,2], col = cols[i], pch = pch[i], type="l") + points(bpc[,1]/60, bpc[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") colvect<-append(colvect,cols[classnames[[k]][j]]) } legend("topright",paste(gsub("(^.+)\\..*$","\\1",basename(files[c(classnames[[k]])]))), col = colvect, lty = lty, pch = pch) @@ -297,7 +301,6 @@ TIC <- vector("list",N) for (i in 1:N) { - cat(files[i],"\n") if (!is.null(xcmsSet) && rt == "corrected") rtcor <- xcmsSet@rt$corrected[[i]] else @@ -318,7 +321,7 @@ if (length(class)>2){ for (k in 1:(length(class)-1)){ for (l in (k+1):length(class)){ - print(paste(class[k],"vs",class[l],sep=" ")) + cat(paste(class[k],"vs",class[l],"\n",sep=" ")) plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") colvect<-NULL for (j in 1:length(classnames[[k]])) { @@ -500,4 +503,4 @@ } graphics.off() }#end for unkn[l] -}#end function \ No newline at end of file +}#end function
--- a/metaMS_runGC.r Wed Jul 03 05:14:32 2019 -0400 +++ b/metaMS_runGC.r Fri Sep 06 06:09:10 2019 -0400 @@ -194,7 +194,7 @@ #runGC accept either a list of files a zip folder or an xset object from xcms.xcmsSet tool #From xset is an .RData file necessary to use the xcmsSet object generated by xcms.xcmsSet given by previous tools if (!is.null(args$singlefile_galaxyPath)){ - cat("Loading datas from XCMS file(s)...\n") + cat("Loading datas from XCMS files...\n") load(args$singlefile_galaxyPath) #Transform XCMS object if needed @@ -207,6 +207,12 @@ stop(error_message) } } + #Verify that there are more than 1 file (can't run metaMS on only 1 file) + if(length(rownames(xdata@phenoData)) < 2){ + error_message="You need more than 1 file to be able to run metaMS" + print(error_message) + stop(error_message) + } #xset from xcms.xcmsSet is not well formatted for metaMS this function do the formatting if (class(xset)=="xcmsSet"){ @@ -253,7 +259,7 @@ #default settings for GC from Wehrens et al cat("Process runGC with metaMS package...\n\n") - print(str(TSQXLS.GC)) + print(str(TSQXLS.GC)) resGC<-runGC(xset=xsetCAM,settings=TSQXLS.GC, rtrange=rtrange, DB= DBgc, removeArtefacts = TRUE, findUnknowns = TRUE, returnXset = TRUE, RIstandards = RIarg, nSlaves = nSlaves) } else { @@ -328,4 +334,12 @@ objects2save <- c("resGC", "xset", "singlefile", "zipfile", "DBgc") save(list = objects2save[objects2save %in% ls()], file = "runGC.RData") -cat("\nEnd of '", modNamC, "' Galaxy module call: ", as.character(Sys.time()), "\n", sep = "") \ No newline at end of file +cat("\nEnd of '", modNamC, "' Galaxy module call: ", as.character(Sys.time()), "\n", sep = "") + +#WARNING if user has CDF files (not yet good for plotting) +files <- paste("./",names(singlefile),sep="") +if(MSnbase:::isCdfFile(files)){ + warning_message <- "You have CDF files, for the moment you can't obtain plot after runGC! A new update will follow with the good correction" + warning(warning_message) + cat(paste("\n","/!\\Warning/!\\",warning_message,sep="\n")) +} \ No newline at end of file
--- a/test-data/W4M0004_database_small.msp Wed Jul 03 05:14:32 2019 -0400 +++ b/test-data/W4M0004_database_small.msp Fri Sep 06 06:09:10 2019 -0400 @@ -1,8 +1,11 @@ Name: Citric acid, 4TMS -DB.idx: 2 +DB.idx: 1 +RI: 1803.92 +Formula: C18H40O7Si4 +monoMW: 480.848 rt: 26.388 +std.rt: 0.0033 Class: Standard -std.rt: 0.0033 Num Peaks: 306 51 182440; 53 944503; 54 369474; 55 3340984; 56 631621; 57 1917866; 58 4022998; 59 7124250; 60 1136187; 61 3360226; @@ -68,10 +71,13 @@ 449 29707; Name: D-Mannitol, 6TMS -DB.idx: 3 +DB.idx: 2 +RI: 1916 +Formula: C24H62O6Si6 +monoMW: 620 rt: 28.581 +std.rt: 0.003 Class: Standard -std.rt: 0.003 Num Peaks: 262 53 489021; 54 603836; 55 1296110; 56 240694; 57 468516; 58 1361419; 59 5385271; 60 600415; 61 685447; 62 50899; @@ -128,10 +134,13 @@ 435 116595; 437 17236; Name: Ribitol, 5TMS -DB.idx: 4 +DB.idx: 3 +RI: 1712.74 +Formula: C20H52O5Si5 +monoMW: 512.052 rt: 24.487 +std.rt: 0.0029 Class: Standard -std.rt: 0.0029 Num Peaks: 236 53 484493; 54 695945; 55 1354523; 56 372585; 57 610381; 58 1975838; 59 7252890; 60 668667; 61 963183; 62 72158; @@ -183,8 +192,9 @@ 427 4882; Name: Glycine, 3TMS -DB.idx: 5 +DB.idx: 4 RI: 1302.682 +Formula: C11H29NO2Si3 monoMW: 291.610 rt: 13.965 std.rt: 0.0033 @@ -208,10 +218,10 @@ 278 137402; 279 34431; 367 3442; Name: Pyroglutamic acid, 2TMS -DB.idx: 6 +DB.idx: 5 RI: 1650.417 Formula: C11H23NO3Si2 -MW: 273.477 +monoMW: 273.477 rt: 19.513 std.rt: 0.0035 Class: Standard @@ -250,10 +260,10 @@ 346 77810; 348 24720; 420 13841; Name: Alanine, 3TMS -DB.idx: 7 +DB.idx: 6 RI: 1360.504 Formula: C12H31NO2Si3 -MW: 305.637 +monoMW: 305.637 rt: 15.323 std.rt: 0.0026 Num Peaks: 124 @@ -284,10 +294,10 @@ 292 225705; 293 58740; 305 25278; 306 8768; Name: Aspartic acid, 2TMS -DB.idx: 8 +DB.idx: 7 RI: 1422.39 Formula: C10H23NO4Si2 -MW: 277.465 +monoMW: 277.465 rt: 17.071 std.rt: 0.0042 Num Peaks: 116 @@ -317,10 +327,10 @@ 442 2912; Name: Tryptamine, 2TMS -DB.idx: 9 +DB.idx: 8 RI: 2224.531 Formula: C16H28N2Si2 -MW: 304.578 +monoMW: 304.578 rt: 19.044 std.rt: 0.0032 Num Peaks: 125
--- a/test-data/dataMatrix.tsv Wed Jul 03 05:14:32 2019 -0400 +++ b/test-data/dataMatrix.tsv Fri Sep 06 06:09:10 2019 -0400 @@ -1,10 +1,10 @@ Name alg3 alg8 alg7 alg9 alg11 alg2 Glycine, 3TMS 8986693 18739515 23638072 64542302 73997431 3271105 -Pyroglutamic acid, 2TMS 52421941 117387451 201537792 172306144 173875991 18034050 +Pyroglutamic acid, 2TMS 52421941 117387451 201537792 172306142 173875991 17771445 Alanine, 3TMS 16302374 40418507 56198912 47836465 75099028 5873408 Aspartic acid, 2TMS 5491883 30361752 53297090 31703522 43848521 0 Tryptamine, 2TMS 24418912 21999992 12482634 19565268 29742266 12344352 -Unknown 1 2608558 7958675 10512589 0 0 716439 +Unknown 1 2608558 7958675 10512729 0 0 716439 Unknown 2 992454 1414530 0 0 0 350707 Unknown 3 0 47472144 65646101 48115807 0 0 Unknown 4 0 17508248 9099661 16725736 22787828 0
--- a/test-data/peakspectra.msp Wed Jul 03 05:14:32 2019 -0400 +++ b/test-data/peakspectra.msp Fri Sep 06 06:09:10 2019 -0400 @@ -1,10 +1,11 @@ Name: Glycine, 3TMS -DB.idx: 5 +DB.idx: 4 RI: 1302.682 +Formula: C11H29NO2Si3 monoMW: 291.61 std.rt: 13.965 Class: Standard -DB.idx: 5 +DB.idx: 4 Num Peaks: 78 57 316293; 58 608407; 59 2360607; 60 298845; 61 196310; 70 169295; 71 161384; 72 648791; 73 11721138; 74 1113027; @@ -24,13 +25,13 @@ 278 137402; 279 34431; 367 3442; Name: Pyroglutamic acid, 2TMS -DB.idx: 6 +DB.idx: 5 RI: 1650.417 Formula: C11H23NO3Si2 -MW: 273.477 +monoMW: 273.477 std.rt: 19.513 Class: Standard -DB.idx: 6 +DB.idx: 5 Num Peaks: 158 51 338626; 52 575985; 53 464956; 54 649153; 55 3336412; 56 1197195; 57 2169449; 58 6685191; 59 9081136; 60 1387309; @@ -66,13 +67,13 @@ 346 77810; 348 24720; 420 13841; Name: Alanine, 3TMS -DB.idx: 7 +DB.idx: 6 RI: 1360.504 Formula: C12H31NO2Si3 -MW: 305.637 +monoMW: 305.637 std.rt: 15.323 Class: Manual -DB.idx: 7 +DB.idx: 6 Num Peaks: 124 54 89758; 55 274561; 56 190979; 57 472743; 58 1227701; 59 8857202; 60 769050; 61 572383; 62 41693; 66 257757; @@ -101,13 +102,13 @@ 292 225705; 293 58740; 305 25278; 306 8768; Name: Aspartic acid, 2TMS -DB.idx: 8 +DB.idx: 7 RI: 1422.39 Formula: C10H23NO4Si2 -MW: 277.465 +monoMW: 277.465 std.rt: 17.071 Class: Manual -DB.idx: 8 +DB.idx: 7 Num Peaks: 116 53 110556; 54 70748; 55 491989; 56 176719; 57 429061; 58 867565; 59 1844082; 60 680229; 61 1783651; 62 153287; @@ -135,13 +136,13 @@ 442 2912; Name: Tryptamine, 2TMS -DB.idx: 9 +DB.idx: 8 RI: 2224.531 Formula: C16H28N2Si2 -MW: 304.578 +monoMW: 304.578 std.rt: 19.044 Class: Manual -DB.idx: 9 +DB.idx: 8 Num Peaks: 125 53 103908; 54 92348; 55 359811; 56 220319; 57 234375; 58 577937; 59 3625396; 60 353747; 61 203780; 65 56054;
--- a/test-data/peaktable.tsv Wed Jul 03 05:14:32 2019 -0400 +++ b/test-data/peaktable.tsv Fri Sep 06 06:09:10 2019 -0400 @@ -1,12 +1,12 @@ -"Name" "DB.idx" "RI" "monoMW" "Class" "Formula" "MW" "std.rt" "rt.sd" "rt" "alg3" "alg8" "alg7" "alg9" "alg11" "alg2" -"Glycine, 3TMS" 5 1302.682 291.61 "Standard" NA NA 13.965 0.0267 13.973 8986693 18739515 23638072 64542302 73997431 3271105 -"Pyroglutamic acid, 2TMS" 6 1650.417 NA "Standard" "C11H23NO3Si2" 273.477 19.513 0.0247 19.512 52421941 117387451 201537792 172306144 173875991 18034050 -"Alanine, 3TMS" 7 1360.504 NA "Manual" "C12H31NO2Si3" 305.637 15.323 0.028 15.332 16302374 40418507 56198912 47836465 75099028 5873408 -"Aspartic acid, 2TMS" 8 1422.39 NA "Manual" "C10H23NO4Si2" 277.465 17.071 0.0359 17.09 5491883 30361752 53297090 31703522 43848521 0 -"Tryptamine, 2TMS" 9 2224.531 NA "Manual" "C16H28N2Si2" 304.578 19.044 0.036 19.068 24418912 21999992 12482634 19565268 29742266 12344352 -"Unknown 1" NA NA NA "Unknown" NA NA NA 0.0049 10.488 2608558 7958675 10512589 0 0 716439 -"Unknown 2" NA NA NA "Unknown" NA NA NA 0.005 11.428 992454 1414530 0 0 0 350707 -"Unknown 3" NA NA NA "Unknown" NA NA NA 0.0037 13.262 0 47472144 65646101 48115807 0 0 -"Unknown 4" NA NA NA "Unknown" NA NA NA 0.0032 17.879 0 17508248 9099661 16725736 22787828 0 -"Unknown 5" NA NA NA "Unknown" NA NA NA 0.0046 17.181 0 15283058 7754880 13107264 19795467 0 -"Unknown 6" NA NA NA "Unknown" NA NA NA 0.004 13.327 0 5270387 6059689 6659142 0 0 +"Name" "DB.idx" "RI" "Formula" "monoMW" "Class" "std.rt" "rt.sd" "rt" "alg3" "alg8" "alg7" "alg9" "alg11" "alg2" +"Glycine, 3TMS" 4 1302.682 "C11H29NO2Si3" 291.61 "Standard" 13.965 0.0267 13.973 8986693 18739515 23638072 64542302 73997431 3271105 +"Pyroglutamic acid, 2TMS" 5 1650.417 "C11H23NO3Si2" 273.477 "Standard" 19.513 0.0247 19.512 52421941 117387451 201537792 172306142 173875991 17771445 +"Alanine, 3TMS" 6 1360.504 "C12H31NO2Si3" 305.637 "Manual" 15.323 0.028 15.332 16302374 40418507 56198912 47836465 75099028 5873408 +"Aspartic acid, 2TMS" 7 1422.39 "C10H23NO4Si2" 277.465 "Manual" 17.071 0.0359 17.09 5491883 30361752 53297090 31703522 43848521 0 +"Tryptamine, 2TMS" 8 2224.531 "C16H28N2Si2" 304.578 "Manual" 19.044 0.036 19.068 24418912 21999992 12482634 19565268 29742266 12344352 +"Unknown 1" NA NA NA NA "Unknown" NA 0.0049 10.488 2608558 7958675 10512729 0 0 716439 +"Unknown 2" NA NA NA NA "Unknown" NA 0.005 11.428 992454 1414530 0 0 0 350707 +"Unknown 3" NA NA NA NA "Unknown" NA 0.0037 13.262 0 47472144 65646101 48115807 0 0 +"Unknown 4" NA NA NA NA "Unknown" NA 0.0032 17.879 0 17508248 9099661 16725736 22787828 0 +"Unknown 5" NA NA NA NA "Unknown" NA 0.0046 17.181 0 15283058 7754880 13107264 19795467 0 +"Unknown 6" NA NA NA NA "Unknown" NA 0.004 13.327 0 5270387 6059689 6659142 0 0
--- a/test-data/variableMetadata.tsv Wed Jul 03 05:14:32 2019 -0400 +++ b/test-data/variableMetadata.tsv Fri Sep 06 06:09:10 2019 -0400 @@ -1,12 +1,12 @@ -Name DB.idx RI monoMW Class Formula MW std.rt rt.sd rt -Glycine, 3TMS 5 1302.682 291.61 Standard NA NA 13.965 0.0267 13.973 -Pyroglutamic acid, 2TMS 6 1650.417 NA Standard C11H23NO3Si2 273.477 19.513 0.0247 19.512 -Alanine, 3TMS 7 1360.504 NA Manual C12H31NO2Si3 305.637 15.323 0.028 15.332 -Aspartic acid, 2TMS 8 1422.39 NA Manual C10H23NO4Si2 277.465 17.071 0.0359 17.09 -Tryptamine, 2TMS 9 2224.531 NA Manual C16H28N2Si2 304.578 19.044 0.036 19.068 -Unknown 1 NA NA NA Unknown NA NA NA 0.0049 10.488 -Unknown 2 NA NA NA Unknown NA NA NA 0.005 11.428 -Unknown 3 NA NA NA Unknown NA NA NA 0.0037 13.262 -Unknown 4 NA NA NA Unknown NA NA NA 0.0032 17.879 -Unknown 5 NA NA NA Unknown NA NA NA 0.0046 17.181 -Unknown 6 NA NA NA Unknown NA NA NA 0.004 13.327 +Name DB.idx RI Formula monoMW Class std.rt rt.sd rt +Glycine, 3TMS 4 1302.682 C11H29NO2Si3 291.61 Standard 13.965 0.0267 13.973 +Pyroglutamic acid, 2TMS 5 1650.417 C11H23NO3Si2 273.477 Standard 19.513 0.0247 19.512 +Alanine, 3TMS 6 1360.504 C12H31NO2Si3 305.637 Manual 15.323 0.028 15.332 +Aspartic acid, 2TMS 7 1422.39 C10H23NO4Si2 277.465 Manual 17.071 0.0359 17.09 +Tryptamine, 2TMS 8 2224.531 C16H28N2Si2 304.578 Manual 19.044 0.036 19.068 +Unknown 1 NA NA NA NA Unknown NA 0.0049 10.488 +Unknown 2 NA NA NA NA Unknown NA 0.005 11.428 +Unknown 3 NA NA NA NA Unknown NA 0.0037 13.262 +Unknown 4 NA NA NA NA Unknown NA 0.0032 17.879 +Unknown 5 NA NA NA NA Unknown NA 0.0046 17.181 +Unknown 6 NA NA NA NA Unknown NA 0.004 13.327