Galaxy |

Changeset 11:91311aa08cdc (2017-01-30)

Previous changeset 10:69eb0fc05837 (2016-07-06) Next changeset 12:15646e937936 (2017-04-07)

Commit message:
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 08e7f269a5c59687a7768be8db5fcb4e4d736093

modified:
README.rst
abims_xcms_xcmsSet.xml
lib.r
macros.xml
xcms.r

diff -r 69eb0fc05837 -r 91311aa08cdc README.rst
--- a/README.rst Wed Jul 06 17:42:15 2016 -0400
+++ b/README.rst Mon Jan 30 08:52:59 2017 -0500

@@ -2,6 +2,14 @@
Changelog/News
--------------

+**Version 2.0.11 - 22/12/2016**
+
+- BUGFIX: propose scanrange for all methods
+
+**Version 2.0.10 - 22/12/2016**
+
+- BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph
+
**Version 2.0.9 - 06/07/2016**

- UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0
@@ -50,4 +58,4 @@

Planemo test using source env.sh: passed

-Planemo shed_test : passed
\ No newline at end of file
+Planemo shed_test : passed

diff -r 69eb0fc05837 -r 91311aa08cdc abims_xcms_xcmsSet.xml
--- a/abims_xcms_xcmsSet.xml Wed Jul 06 17:42:15 2016 -0400
+++ b/abims_xcms_xcmsSet.xml Mon Jan 30 08:52:59 2017 -0500

[

@@ -1,14 +1,14 @@
-<tool id="abims_xcms_xcmsSet" name="xcms.xcmsSet" version="2.0.9">
-
+<tool id="abims_xcms_xcmsSet" name="xcms.xcmsSet" version="2.0.11">
+
     <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description>
-
+
     <macros>
         <import>macros.xml</import>
     </macros>

     <expand macro="requirements"/>
     <expand macro="stdio"/>
-
+
     <command><![CDATA[
         @COMMAND_XCMS_SCRIPT@
         #if $inputs.input == "lib":
@@ -24,14 +24,16 @@
         ticspdf $ticsRawPdf
         bicspdf $bpcsRawPdf

-        ## profmethod $profmethod
-        nSlaves \${GALAXY_SLOTS:-1} method $methods.method
+
+        #if $options_scanrange.option == "show":
+            scanrange "c($options_scanrange.scanrange)"
+        #end if
+
+        ## profmethod $profmethod
+        nSlaves \${GALAXY_SLOTS:-1} method $methods.method
         #if $methods.method == "centWave":
             ppm $methods.ppm
             peakwidth "c($methods.peakwidth)"
-            #if $methods.options_scanrange.option == "show":
-                scanrange "c($methods.options_scanrange.scanrange)"
-            #end if
             #if $methods.options_c.option == "show":
                 mzdiff $methods.options_c.mzdiff
                 snthresh $methods.options_c.snthresh
@@ -60,7 +62,7 @@
         #end if
         @COMMAND_LOG_EXIT@
     ]]></command>
-
+
     <inputs>

         <conditional name="inputs">
@@ -73,13 +75,27 @@
              </when>
             <when value="lib">
                 <param name="library" type="text" size="40" label="Library directory name" help="The name of your directory containing all your data" >
-                <validator type="empty_field"/>
+                <validator type="empty_field"/>
             </param>
                 </when>

         </conditional>

-
+        <conditional name="options_scanrange">
+            <param name="option" type="select" label="Scan range option " >
+                <option value="show">show</option>
+                <option value="hide" selected="true">hide</option>
+            </param>
+            <when value="show">
+                <param name="scanrange" type="text" value="" label="scanrange" help="scan range to process, for example (16,365)" >
+                    <validator type="empty_field"/>
+                </param>
+            </when>
+            <when value="hide">
+            </when>
+        </conditional>
+
+

+            
             <when value="matchedFilter">
                 <param name="step" type="float" value="0.01" label="Step size to use for profile generation" help="[step] The peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size" />
                 <param name="fwhm" type="integer" value="30" label="Full width at half maximum of matched filtration gaussian model peak" help="[fwhm] Only used to calculate the actual sigma" />
@@ -159,7 +162,7 @@
                 </conditional>
             </when>

-        
+            
             <when value="MSW">
                 <param name="nearbyPeak" type="select" label="Determine whether to include the nearby small peaks of major peaks" help="[nearbyPeak]" >
                     <option value="TRUE">TRUE</option>
@@ -173,7 +176,7 @@
             </when>
         </conditional>
     </inputs>
-
+
     <outputs>
         <data name="xsetRData" format="rdata.xcms.raw" label="xset.RData" />
         <data name="sampleMetadata" format="tabular" label="sampleMetadata.tsv" />
@@ -181,7 +184,7 @@
         <data name="bpcsRawPdf"   format="pdf" label="xset.BPCs_raw.pdf" />
         <data name="log" format="txt" label="xset.log.txt" />
     </outputs>
-
+
     <tests>
         <!--<test>
             <param name="inputs|input" value="zip_file" />
@@ -239,7 +242,7 @@
             </output>
         </test>
     </tests>
-
+
     <help><![CDATA[

@HELP_AUTHORS@
@@ -267,7 +270,7 @@
========================= ================= ======= =========
Name                      output file       format  parameter
========================= ================= ======= =========
-NA                        NA                zip     NA
+NA                        NA                zip     NA
========================= ================= ======= =========

@@ -291,7 +294,7 @@

------

-.. class:: infomark
+.. class:: infomark

The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool.

@@ -371,7 +374,7 @@

**Matched Filter**

-    | One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm.
+    | One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm.
     | For a discussion of how model peak width affects the signal to noise ratio, see Danielsson et al. (2002).

@@ -409,10 +412,10 @@
xset.RData: rdata.xcms.raw format

     | Rdata file that is necessary in the second step of the workflow "xcms.group".
-
+
------

-.. class:: infomark
+.. class:: infomark

The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool.

@@ -432,7 +435,7 @@

     | Method -> **matchedFilter**
     | step   -> **0.01**
-    | fwhm   -> **4**
+    | fwhm   -> **4**
     | Advanced option -> **show**
     | max: -> **50**
     | snthresh -> **1**
@@ -475,9 +478,17 @@
Changelog/News
--------------

+**Version 2.0.11 - 22/12/2016**
+
+- BUGFIX: propose scanrange for all methods
+
+**Version 2.0.10 - 22/12/2016**
+
+- BUGFIX: when having only one group (i.e. one folder of raw data) the BPC and TIC pdf files do not contain any graph
+
**Version 2.0.9 - 06/07/2016**

-- UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0
+- UPGRADE: upgrade the xcms version from 1.44.0 to 1.46.0

**Version 2.0.8 - 06/04/2016**

diff -r 69eb0fc05837 -r 91311aa08cdc lib.r
--- a/lib.r Wed Jul 06 17:42:15 2016 -0400
+++ b/lib.r Mon Jan 30 08:52:59 2017 -0500

[

b'@@ -1,14 +1,59 @@\n-# lib.r version="2.0.1"\n #Authors ABiMS TEAM\n-#Lib.r for Galaxy Workflow4Metabo\n+#Lib.r for Galaxy Workflow4Metabolomics xcms tools\n+#\n+#version 2.4: lecorguille\n+# add getPeaklistW4M\n+#version 2.3: yguitton\n+# correction for empty PDF when only 1 class\n #version 2.2\n-#Based on lib.r 2.1\n-#Modifications made by Guitton Yann \n-#correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet\n-#Note if scanrange is used a warning is prompted in R console but do not stop PDF generation\n+# correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet\n+# Note if scanrange is used a warning is prompted in R console but do not stop PDF generation\n+#version 2.1: yguitton\n+# Modifications made by Guitton Yann\n \n \n+#@author G. Le Corguille\n+#This function convert if it is required the Retention Time in minutes\n+RTSecondToMinute <- function(variableMetadata, convertRTMinute) {\n+ if (convertRTMinute){\n+ #converting the retention times (seconds) into minutes\n+ print("converting the retention times into minutes in the variableMetadata")\n+ variableMetadata[,"rt"]=variableMetadata[,"rt"]/60\n+ variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60\n+ variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60\n+ }\n+ return (variableMetadata)\n+}\n \n+#@author G. Le Corguille\n+#This function format ions identifiers\n+formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) {\n+ return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT))))\n+}\n+\n+#@author G. Le Corguille\n+# value: intensity values to be used into, maxo or intb\n+getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) {\n+ groups <- xset@groups\n+ values <- groupval(xset, "medret", value=intval)\n+ \n+ # renamming of the column rtmed to rt to fit with camera peaklist function output\n+ colnames(groups)[colnames(groups)=="rtmed"] <- "rt"\n+ colnames(groups)[colnames(groups)=="mzmed"] <- "mz"\n+ \n+ ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)\n+ groups = RTSecondToMinute(groups, convertRTMinute)\n+\n+ rownames(groups) = ids\n+ rownames(values) = ids\n+\n+ #@TODO: add "name" as the first column name\n+ #colnames(groups)[1] = "name"\n+ #colnames(values)[1] = "name"\n+\n+ write.table(groups, file=variableMetadataOutput,sep="\\t",quote=F,row.names = T,col.names = NA)\n+ write.table(values, file=dataMatrixOutput,sep="\\t",quote=F,row.names = T,col.names = NA)\n+}\n \n #@author Y. Guitton\n getBPC <- function(file,rtcor=NULL, ...) {\n@@ -44,13 +89,13 @@\n for (j in 1:N) {\n \n TIC[[j]] <- getBPC(files[j])\n- #good for raw \n+ #good for raw\n # seems strange for corrected\n #errors if scanrange used in xcmsSetgeneration\n if (!is.null(xcmsSet) && rt == "corrected")\n rtcor <- xcmsSet@rt$corrected[[j]] else\n rtcor <- NULL\n- \n+\n TIC[[j]] <- getBPC(files[j],rtcor=rtcor)\n # TIC[[j]][,1]<-rtcor\n }\n@@ -68,11 +113,11 @@\n \n \n ##plot start\n- \n+\n if (length(class)>2){\n for (k in 1:(length(class)-1)){\n for (l in (k+1):length(class)){\n- #print(paste(class[k],"vs",class[l],sep=" ")) \n+ #print(paste(class[k],"vs",class[l],sep=" "))\n plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \\n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")\n colvect<-NULL\n for (j in 1:length(classnames[[k]])) {\n@@ -115,6 +160,24 @@\n \n }#end length ==2\n \n+ #case where only one class\n+ if (length(class)==1){\n+ k=1\n+\t\tylim = range(sapply(TIC, function(x) range(x[,2])))\n+ colvect<-NULL\n+ plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \\n","BPCs_",class[k], sep=""), xlab = "Retention Tim'..b'cmsSet@phenoData[,1]==class[i])\n }\n- \n+\n N <- length(files)\n TIC <- vector("list",N)\n \n@@ -178,7 +241,7 @@\n if (length(class)>2){\n for (k in 1:(length(class)-1)){\n for (l in (k+1):length(class)){\n- #print(paste(class[k],"vs",class[l],sep=" ")) \n+ #print(paste(class[k],"vs",class[l],sep=" "))\n plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \\n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")\n colvect<-NULL\n for (j in 1:length(classnames[[k]])) {\n@@ -219,6 +282,25 @@\n legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)\n \n }#end length ==2\n+\n+ #case where only one class\n+ if (length(class)==1){\n+\t k=1\n+\t ylim = range(sapply(TIC, function(x) range(x[,2])))\n+\n+\t plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \\n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")\n+ colvect<-NULL\n+\t\tfor (j in 1:length(classnames[[k]])) {\n+ tic <- TIC[[classnames[[k]][j]]]\n+\t\t\t# points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")\n+\t\t\tpoints(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")\n+ colvect<-append(colvect,cols[classnames[[k]][j]])\n+\t }\n+\n+\t\tlegend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)\n+\n+\t}#end length ==1\n+\n dev.off() #pdf(pdfname,w=16,h=10)\n \n invisible(TIC)\n@@ -237,7 +319,7 @@\n sampleMetadata=xset@phenoData\n sampleNamesOrigin=rownames(sampleMetadata)\n sampleNamesMakeNames=make.names(sampleNamesOrigin)\n- \n+\n if (any(duplicated(sampleNamesMakeNames))) {\n write("\\n\\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())\n for (sampleName in sampleNamesOrigin) {\n@@ -285,7 +367,7 @@\n \n #Set the polarity attribute\n sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity\n- \n+\n #Delete xcmsRaw object because it creates a bug for the fillpeaks step\n rm(xcmsRaw)\n }\n@@ -321,7 +403,7 @@\n filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]\n \n # COMPARISON\n- if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { \n+ if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {\n write("\\n\\nERROR: List of the files which will not be imported by xcmsSet",stderr())\n write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())\n stop("\\n\\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")\n@@ -347,7 +429,7 @@\n write(capture, stderr())\n stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")\n }\n- \n+\n }\n \n \n@@ -359,7 +441,7 @@\n cat("Checking Non ASCII characters in the XML...\\n")\n \n processed=F\n- l=system( paste("find",directory, "-not -name \'\\\\.*\' -not -path \'*conda-env*\' -type f -iname \'*.*ml*\'"),intern=TRUE) \n+ l=system( paste("find",directory, "-not -name \'\\\\.*\' -not -path \'*conda-env*\' -type f -iname \'*.*ml*\'"),intern=TRUE)\n for (i in l){\n cmd=paste("LC_ALL=C grep \'[^ -~]\' \\"",i,"\\"",sep="")\n capture=suppressWarnings(system(cmd,intern=TRUE))\n@@ -368,7 +450,7 @@\n print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )\n c=system(cmd,intern=TRUE)\n capture=""\n- processed=T \n+ processed=T\n }\n }\n if (processed) cat("\\n\\n")\n@@ -376,7 +458,7 @@\n }\n \n \n-## \n+##\n ## This function will compute MD5 checksum to check the data integrity\n ##\n #@author Gildas Le Corguille lecorguille@sb-roscoff.fr\n@@ -397,4 +479,3 @@\n \n return(as.matrix(md5sum(files)))\n }\n-\n'

diff -r 69eb0fc05837 -r 91311aa08cdc macros.xml
--- a/macros.xml Wed Jul 06 17:42:15 2016 -0400
+++ b/macros.xml Mon Jan 30 08:52:59 2017 -0500

@@ -2,7 +2,6 @@
<macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="3.1.2">R</requirement>
             <requirement type="package" version="0.4_1">r-snow</requirement>
             <requirement type="package" version="1.46.0">bioconductor-xcms</requirement>
             <requirement type="package" version="1.1_4">r-batch</requirement>
@@ -40,7 +39,7 @@
         <conditional name="zipfile_load_conditional">
             <param name="zipfile_load_select" type="select" label="Resubmit your zip file" help="Use only if you get a message which say that your original zip file have been deleted on the server." >
                 <option value="no" >no need</option>
-                <option value="yes" selected="peakgroups">yes</option>
+                <option value="yes">yes</option>
             </param>
             <when value="no">
             </when>

diff -r 69eb0fc05837 -r 91311aa08cdc xcms.r
--- a/xcms.r Wed Jul 06 17:42:15 2016 -0400
+++ b/xcms.r Mon Jan 30 08:52:59 2017 -0500

[

@@ -19,7 +19,7 @@
   cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="")
}
source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
-cat("\n\n");
+cat("\n\n");

@@ -64,18 +64,35 @@
   xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL
}

+#saving the specific parameters
rplotspdf = "Rplots.pdf"
if (!is.null(listArguments[["rplotspdf"]])){
   rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL
}
-
sampleMetadataOutput = "sampleMetadata.tsv"
if (!is.null(listArguments[["sampleMetadataOutput"]])){
   sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL
}
-
-
-
+variableMetadataOutput = "variableMetadata.tsv"
+if (!is.null(listArguments[["variableMetadataOutput"]])){
+  variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL
+}
+dataMatrixOutput = "dataMatrix.tsv"
+if (!is.null(listArguments[["dataMatrixOutput"]])){
+  dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL
+}
+if (!is.null(listArguments[["convertRTMinute"]])){
+  convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL
+}
+if (!is.null(listArguments[["numDigitsMZ"]])){
+  numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL
+}
+if (!is.null(listArguments[["numDigitsRT"]])){
+  numDigitsRT = listArguments[["numDigitsRT"]]; listArguments[["numDigitsRT"]]=NULL
+}
+if (!is.null(listArguments[["intval"]])){
+  intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL
+}

if (thefunction %in% c("xcmsSet","retcor")) {
   ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL
@@ -116,15 +133,15 @@
     suppressWarnings(unzip(zipfile, unzip="unzip"))

     #get the directory name
-    filesInZip=unzip(zipfile, list=T);
+    filesInZip=unzip(zipfile, list=T);
     directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));
     directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
     directory = "."
     if (length(directories) == 1) directory = directories
-
+
     cat("files_root_directory\t",directory,"\n")

-    #
+    #
     md5sumList=list("origin"=getMd5sum(directory))

     # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files.
@@ -187,6 +204,8 @@

#execution of the function "thefunction" with the parameters given in "listArguments"
+
+cat("\t\tCOMPUTE\n")
xset = do.call(thefunction, listArguments)

@@ -200,7 +219,7 @@
   xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths)

   if(exists("zipfile") && (zipfile!="")) {
-
+
     #Modify the samples names (erase the path)
     for(i in 1:length(sampnames(xset))){

@@ -217,17 +236,24 @@

# -- TIC --
if (thefunction == "xcmsSet") {
+  cat("\t\tGET TIC GRAPH\n")
   sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput)
   getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw")
   getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf)
} else if (thefunction == "retcor") {
+  cat("\t\tGET TIC GRAPH\n")
   getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected")
   getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf)
}

+if (thefunction == "fillPeaks") {
+  cat("\t\tGET THE PEAK LIST\n")
+  getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput)
+}
+
+
cat("\n\n")

-
# ----- EXPORT -----

cat("\tXSET OBJECT INFO\n")
@@ -243,4 +269,3 @@

cat("\tDONE\n")
-