Repository 'metams_rungc'
hg clone https://toolshed.g2.bx.psu.edu/repos/yguitton/metams_rungc

Changeset 3:c75532b75ba1 (2017-06-08)
Previous changeset 2:8e1b99a7d733 (2017-05-24) Next changeset 4:c10824185547 (2019-07-03)
Commit message:
Uploaded version 2.1.1
modified:
lib_metams.r
metams.r
metams_runGC.xml
b
diff -r 8e1b99a7d733 -r c75532b75ba1 lib_metams.r
--- a/lib_metams.r Wed May 24 07:39:18 2017 -0400
+++ b/lib_metams.r Thu Jun 08 11:53:35 2017 -0400
[
b'@@ -1,6 +1,7 @@\n-# lib_metams.r version 0.99.6\n+# lib_metams.r version 2.0.0\n # R function for metaMS runGC under W4M\n # author Yann GUITTON CNRS IRISA/LINA Idealg project 2014-2015\n+# author Yann GUITTON Oniris Laberca 2015-2017\n \n \n ##ADDITIONS FROM Y. Guitton\n@@ -245,249 +246,335 @@\n     ##Annotation table each value is a pcgrp associated to the unknown \n     ##NOTE pcgrp index are different between xcmsSet and resGC due to filtering steps in metaMS\n     ##R. Wehrens give me some clues on that and we found a correction\n- \n-    mat<-matrix(ncol=length(resGC$xset), nrow=dim(resGC$PeakTable)[1])\n-     \n-    for (j in 1: length(resGC$xset)){\n-        test<-resGC$annotation[[j]]\n-        print(paste("j=",j))\n-        for (i in 1:dim(test)[1]){\n-            if (as.numeric(row.names(test)[i])>dim(mat)[1]){\n-                next\n-            } else {\n-                mat[as.numeric(row.names(test)[i]),j]<-test[i,1]\n-            }\n+    #if unkn="none"\n+\t\n+\tif(unkn=="none") {\n+\t   pdf("Unknown_Empty.pdf")\n+\t   plot.new()\n+\t   text(x=0.5,y=1,pos=1, labels="No EIC ploting required")\n+\t   dev.off()\n+\t}else {\n+\n+\t\tmat<-matrix(ncol=length(resGC$xset), nrow=dim(resGC$PeakTable)[1])\n+\t\t \n+\t\tfor (j in 1: length(resGC$xset)){\n+\t\t\ttest<-resGC$annotation[[j]]\n+\t\t\tprint(paste("j=",j))\n+\t\t\tfor (i in 1:dim(test)[1]){\n+\t\t\t\tif (as.numeric(row.names(test)[i])>dim(mat)[1]){\n+\t\t\t\t\tnext\n+\t\t\t\t} else {\n+\t\t\t\t\tmat[as.numeric(row.names(test)[i]),j]<-test[i,1]\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\t\tcolnames(mat)<-colnames(resGC$PeakTable[,c((which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI")[length(which(colnames(resGC$PeakTable)=="rt"|colnames(resGC$PeakTable)=="RI"))]+1):dim(resGC$PeakTable)[2])])\n+\t\t\n+\t\t#debug\n+\n+\t\t# print(dim(mat))\n+\t\t# print(mat[1:3,]) \n+\t\t# write.table(mat, file="myannotationtable.tsv", sep="\\t", row.names=FALSE)\n+\t\t#correction of annotation matrix due to pcgrp removal by quality check in runGCresult\n+\t\t#matrix of correspondance between an@pspectra and filtered pspectra from runGC\n+\n+\t\tallPCGRPs <-\n+\t\t\tlapply(1:length(resGC$xset),\n+\t\t\t\tfunction(i) {\n+\t\t\t\t\tan <- resGC$xset[[i]]\n+\t\t\t\t\thuhn <- an@pspectra[which(sapply(an@pspectra, length) >=\n+\t\t\t\t\tmetaSetting(resGC$settings,\n+\t\t\t\t\t"DBconstruction.minfeat"))]\n+\t\t\t\t\tmatCORR<-cbind(1:length(huhn), match(huhn, an@pspectra))\n+\t\t\t\t})\n+\n+\t\tif (unkn[1]==""){    \n+\t\t#plot EIC and spectra for all unknown for comparative purpose\n+\t   \n+\n+\t\t\tpar (mar=c(5, 4, 4, 2) + 0.1)\n+\t\t\tfor (l in 1:dim(resGC$PeakTable)[1]){ #l=2\n+\t\t\t\t#recordPlot\n+\t\t\t\tperpage=3 #if change change layout also!\n+\t\t\t\tnum.plots <- ceiling(dim(mat)[2]/perpage) #three pcgroup per page\n+\t\t\t\tmy.plots <- vector(num.plots, mode=\'list\')\n+\t\t\t\tdev.new(width=21/2.54, height=29.7/2.54, file=paste("Unknown_",l,".pdf", sep="")) #A4 pdf\n+\t\t\t\t# par(mfrow=c(perpage,2))\n+\t\t\t\tlayout(matrix(c(1,1,2,3,4,4,5,6,7,7,8,9), 6, 2, byrow = TRUE), widths=rep(c(1,1),perpage), heights=rep(c(1,5),perpage))\n+\t\t\t\t# layout.show(6)\n+\t\t\t\toma.saved <- par("oma")\n+\t\t\t\tpar(oma = rep.int(0, 4))\n+\t\t\t\tpar(oma = oma.saved)\n+\t\t\t\to.par <- par(mar = rep.int(0, 4))\n+\t\t\t\ton.exit(par(o.par))\n+\t\t\t\tstop=0 #initialize\n+\t\t\t\tfor (i in 1:num.plots) {\n+\t\t\t\t\tstart=stop+1\n+\t\t\t\t\tstop=start+perpage-1 #\n+\t\t\t\t\tfor (c in start:stop){\n+\t\t\t\t\t\tif (c <=dim(mat)[2]){\n+\t\t\t\t\t\t\t\t\n+\t\t\t\t\t\t\t#get sample name\n+\t\t\t\t\t\t\tsampname<-basename(resGC$xset[[c]]@xcmsSet@filepaths)\n+\n+\t\t\t\t\t\t\t#remove .cdf, .mzXML filepattern\n+\t\t\t\t\t\t\tfilepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", \n+\t\t\t\t\t\t\t\t\t"[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")\n+\t\t\t\t\t\t\tfilepattern <- paste(paste("\\\\.", filepattern, "$", sep = ""), \n+\t\t\t\t\t\t\t\t\tcollapse = "|")\n+\t\t\t\t\t\t\tsampname<-gsub(filepattern, "",sampname)\n+\t\t\t\t\t\t\t \n+\t\t\t\t\t\t\ttitle1<-paste("unknown", l,"from",sampname, sep=" ")\n+\t\t\t\t\t\t\tan<-resGC$xset[[c]]\n+\t\t\t\t\t\t\t \n+\t\t\t\t\t\t\tpar (mar=c(0, 0, 0, 0) + 0.1)\n+\t\t\t\t\t\t\tplot.new()\n+\t\t\t\t\t\t\tbox()\n+\t\t\t\t\t\t\ttext(0.5, 0.5, title1, cex=2)\n+\t\t\t\t\t\t\tif (!is.na(mat[l,c])){\n+\t\t\t\t\t\t\t\tpcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[l,c]),2]\n+\t'..b'pattern, "$", sep = ""), collapse = "|")\n-                            sampname<-gsub(filepattern, "",sampname)\n-                             \n-                            title1<-paste("unknown",unkn[l],"from",sampname, sep=" ")\n-                            an<-resGC$xset[[c]]\n-                             \n-                            par (mar=c(0, 0, 0, 0) + 0.1)\n-                            plot.new()\n-                            box()\n-                            text(0.5, 0.5, title1, cex=2)\n-                            if (!is.na(mat[unkn[l],c])){\n-                                pcgrp=allPCGRPs[[c]][which(allPCGRPs[[c]][,1]==mat[unkn[l],c]),2]\n-                                if (pcgrp!=mat[unkn[l],c]) print ("pcgrp changed")\n-                                par (mar=c(3, 2.5, 3, 1.5) + 0.1)\n-                                plotEICs(an, pspec=pcgrp, maxlabel=2)\n-                                plotPsSpectrum(an, pspec=pcgrp, maxlabel=2)\n-                            } else {\n-                                plot.new()\n-                                box()\n-                                text(0.5, 0.5, "NOT FOUND", cex=2)\n-                                plot.new()\n-                                box()\n-                                text(0.5, 0.5, "NOT FOUND", cex=2)\n-                            }\n-                        }\n-                    }\n-                    # my.plots[[i]] <- recordPlot()\n-                }\n-                graphics.off()\n-\n-                # pdf(file=paste("Unknown_",unkn[l],".pdf", sep=""), onefile=TRUE)\n-                # for (my.plot in my.plots) {\n-                    # replayPlot(my.plot)\n-                # }\n-                # my.plots\n-                # graphics.off()\n-\n-            }#end  for unkn[l]\n-        \n-        }\n-    \n+    for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) {\n+        listArguments[[argument]]=NULL\n     }\n-} #end function \n+    return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments))\n+}\n \n \n+# This function retrieve the raw file in the working directory\n+#   - if zipfile: unzip the file with its directory tree\n+#   - if singlefiles: set symlink with the good filename\n+retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {\n+    if(!is.null(singlefile) && (length("singlefile")>0)) {\n+        for (singlefile_sampleName in names(singlefile)) {\n+            singlefile_galaxyPath = singlefile[[singlefile_sampleName]]\n+            if(!file.exists(singlefile_galaxyPath)){\n+                error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")\n+                print(error_message); stop(error_message)\n+            }\n \n+            file.symlink(singlefile_galaxyPath,singlefile_sampleName)\n+        }\n+        directory = "."\n+\n+    }\n+    if(!is.null(zipfile) && (zipfile!="")) {\n+        if(!file.exists(zipfile)){\n+            error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")\n+            print(error_message)\n+            stop(error_message)\n+        }\n+\n+        #list all file in the zip file\n+        #zip_files=unzip(zipfile,list=T)[,"Name"]\n+\n+        #unzip\n+        suppressWarnings(unzip(zipfile, unzip="unzip"))\n+\n+        #get the directory name\n+        filesInZip=unzip(zipfile, list=T);\n+        directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));\n+        directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]\n+        directory = "."\n+        if (length(directories) == 1) directory = directories\n+\n+        cat("files_root_directory\\t",directory,"\\n")\n+\n+    }\n+    return (directory)\n+}\n+\n'
b
diff -r 8e1b99a7d733 -r c75532b75ba1 metams.r
--- a/metams.r Wed May 24 07:39:18 2017 -0400
+++ b/metams.r Thu Jun 08 11:53:35 2017 -0400
[
@@ -1,7 +1,7 @@
 #!/usr/local/public/bin/Rscript --vanilla --slave --no-site-file
-# metams.r version="2.0"
+# metams.r version="2.1.1"
 #created by Yann GUITTON 
-#use RI options
+#use RI options + add try on plotUnknown add session Info
 
 #Redirect all stdout to the log file
 log_file=file("metams.log", open = "wt")
@@ -16,13 +16,24 @@
     base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
     source(paste(base_dir, fname, sep="/"))
 }
-print("step1")
+# print("step1")
 
 
 listArguments = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
-print("new version 2.0")
+# print("new version 2.0")
+## constants
+##----------
+
+modNamC <- "metaMS:runGC" ## module name
 
 
+## log file
+##---------
+cat("\nStart of the '", modNamC, "' Galaxy module call: ",
+format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
+
+
+cat("\n1) Parameters:\n")
 print(listArguments)
 
 
@@ -343,8 +354,13 @@
 
 #to do check if no peaks found
 #Quality controls plots but only working in R (don't know why)
-a<-plotUnknowns(resGC=resGC, unkn=unknarg) #use unknparam value
-
+a<-try(plotUnknowns(resGC=resGC, unkn=unknarg)); #use unknparam value
+if(class(a) == "try-error") {
+   pdf("Unknown_Error.pdf")
+ plot.new()
+ text(x=0.5,y=1,pos=1, labels="Error generating EICs\n please use none instead of a vector in plotUnknown")
+   dev.off()
+}
 # create a mergpdf
 
 #test
@@ -370,3 +386,27 @@
 #saving R data in .Rdata file to save the variables used in the present tool
 save.image(paste("runGC","RData",sep="."))
 
+## Closing
+##--------
+
+cat("\nEnd of '", modNamC, "' Galaxy module call: ",
+    as.character(Sys.time()), "\n", sep = "")
+
+cat("\n\n\n============================================================================")
+cat("\nAdditional information about the call:\n")
+# cat("\n1) Parameters:\n")
+# print(cbind(value = argVc))
+
+cat("\n1) Session Info:\n")
+sessioninfo <- sessionInfo()
+cat(sessioninfo$R.version$version.string,"\n")
+cat("Main packages:\n")
+for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
+cat("Other loaded packages:\n")
+for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
+
+cat("============================================================================\n")
+
+sink()
+
+rm(list = ls())
\ No newline at end of file
b
diff -r 8e1b99a7d733 -r c75532b75ba1 metams_runGC.xml
--- a/metams_runGC.xml Wed May 24 07:39:18 2017 -0400
+++ b/metams_runGC.xml Thu Jun 08 11:53:35 2017 -0400
b
@@ -1,4 +1,4 @@
- <tool id="metams_runGC" name="metaMS.runGC" version="2.1">
+ <tool id="metams_runGC" name="metaMS.runGC" version="2.1.1">
        
     <description>GC-MS data preprocessing using metaMS package</description>
     
@@ -57,7 +57,11 @@
             db "NULL"
         #end if
         nSlaves \${GALAXY_SLOTS:-1}
-        unkn "c($unkn)"
+        #if $unkn == "none":
+    unkn "none"
+ #elif $unkn != "none":
+    unkn "c($unkn)"
+ #end if
          
     </command>
     
@@ -128,7 +132,7 @@
                     <when value="hide" />
                 </conditional>
                
-                <param name="rtdiff" type="float" value="0.05" label="RT_Diff" help="The allowed RT shift between same molecule in different sample" />
+                <param name="rtdiff" type="float" value="0.05" label="RT_Diff" help="The allowed RT shift in minutes between same molecule in different sample" />
                 <param name="minfeat" type="integer" value="5" label="Min_Features" help="The minimum number of ion in a mass spectra to consider it a molecule" />
                 <param name="simthreshold" type="float" value="0.70" label="similarity_threshold" help="The minimum similarity allowed between peaks mass spectra to be considered as equal" />
                 <param name="minclassfraction" type="float" value="0.5" label="min.class.fract" help="The fraction of samples in which a pseudospectrum is present before it is regarded as an unknown" />
@@ -158,7 +162,7 @@
                 
             </when>
       </conditional>
-         <param name="unkn" type="text" value="1:5" label="EIC_Unknown" help="vector of peaks number to be plotted, for example 1:5 (mean 1 to 5) or 1,4,12  means 1 4 and 12). For all EIC use 0" />
+         <param name="unkn" type="text" value="1:5" label="EIC_Unknown" help="vector of peaks number to be plotted, for example 1:5 (mean 1 to 5) or 1,4,12  means 1 4 and 12). For all EIC use 0. none for no EIC plot" />
          <conditional name="options_rifilter">
  <param name="option" type="select" label="Use RI as filter" >
  <option value="false" selected="true">FALSE</option>
@@ -236,6 +240,7 @@
 Description
 -----------
 metaMS.runGC is a function dedicated to GCMS data processing from converted files to the generation of pseudospectra (compounds) table.
+Current version for metaMS R package: 1.8.0
 
 **Process:** 
 Each of the converted data (cdf, mzML...)  is profiled by a combination of xcms and CAMERA functions. Then all the mass spectra of detected peaks are compared and clustered.
@@ -364,6 +369,11 @@
 Changelog/News
 --------------
 
+**Version 2.1 - 08/06/2017**
+
+- Quality: add sessionInfo logs with packages versions
+- Processing: add RI usage 
+
 **Version 1.1 - 11/07/2016**
 
 - TEST: refactoring to pass planemo test using conda dependencies