Mercurial > repos > lecorguille > xcms_group
comparison lib.r @ 14:833d2c821d9c draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 9f72e947d9c241d11221cad561f3525d27231857
author | lecorguille |
---|---|
date | Tue, 18 Sep 2018 16:11:06 -0400 |
parents | c81275aee959 |
children | 6a4d8753a330 |
comparison
equal
deleted
inserted
replaced
13:13558e8a4778 | 14:833d2c821d9c |
---|---|
1 #Authors ABiMS TEAM | 1 #@authors ABiMS TEAM, Y. Guitton |
2 #Lib.r for Galaxy Workflow4Metabolomics xcms tools | 2 # lib.r for Galaxy Workflow4Metabolomics xcms tools |
3 # | 3 |
4 #version 2.4: lecorguille | 4 #@author G. Le Corguille |
5 # add getPeaklistW4M | 5 # solve an issue with batch if arguments are logical TRUE/FALSE |
6 #version 2.3: yguitton | 6 parseCommandArgs <- function(...) { |
7 # correction for empty PDF when only 1 class | 7 args <- batch::parseCommandArgs(...) |
8 #version 2.2 | 8 for (key in names(args)) { |
9 # correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet | 9 if (args[key] %in% c("TRUE","FALSE")) |
10 # Note if scanrange is used a warning is prompted in R console but do not stop PDF generation | 10 args[key] = as.logical(args[key]) |
11 #version 2.1: yguitton | 11 } |
12 # Modifications made by Guitton Yann | 12 return(args) |
13 | 13 } |
14 | 14 |
15 #@author G. Le Corguille | 15 #@author G. Le Corguille |
16 #This function convert if it is required the Retention Time in minutes | 16 # This function will |
17 # - load the packages | |
18 # - display the sessionInfo | |
19 loadAndDisplayPackages <- function(pkgs) { | |
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | |
21 | |
22 sessioninfo = sessionInfo() | |
23 cat(sessioninfo$R.version$version.string,"\n") | |
24 cat("Main packages:\n") | |
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
26 cat("Other loaded packages:\n") | |
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
28 } | |
29 | |
30 #@author G. Le Corguille | |
31 # This function merge several chromBPI or chromTIC into one. | |
32 mergeChrom <- function(chrom_merged, chrom) { | |
33 if (is.null(chrom_merged)) return(NULL) | |
34 chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data) | |
35 return(chrom_merged) | |
36 } | |
37 | |
38 #@author G. Le Corguille | |
39 # This function merge several xdata into one. | |
40 mergeXData <- function(args) { | |
41 chromTIC <- NULL | |
42 chromBPI <- NULL | |
43 chromTIC_adjusted <- NULL | |
44 chromBPI_adjusted <- NULL | |
45 for(image in args$images) { | |
46 | |
47 load(image) | |
48 # Handle infiles | |
49 if (!exists("singlefile")) singlefile <- NULL | |
50 if (!exists("zipfile")) zipfile <- NULL | |
51 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) | |
52 zipfile <- rawFilePath$zipfile | |
53 singlefile <- rawFilePath$singlefile | |
54 retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) | |
55 | |
56 if (exists("raw_data")) xdata <- raw_data | |
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") | |
58 | |
59 cat(sampleNamesList$sampleNamesOrigin,"\n") | |
60 | |
61 if (!exists("xdata_merged")) { | |
62 xdata_merged <- xdata | |
63 singlefile_merged <- singlefile | |
64 md5sumList_merged <- md5sumList | |
65 sampleNamesList_merged <- sampleNamesList | |
66 chromTIC_merged <- chromTIC | |
67 chromBPI_merged <- chromBPI | |
68 chromTIC_adjusted_merged <- chromTIC_adjusted | |
69 chromBPI_adjusted_merged <- chromBPI_adjusted | |
70 } else { | |
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) | |
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata) | |
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") | |
74 | |
75 singlefile_merged <- c(singlefile_merged,singlefile) | |
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) | |
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) | |
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) | |
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC) | |
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI) | |
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted) | |
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted) | |
83 } | |
84 } | |
85 rm(image) | |
86 xdata <- xdata_merged; rm(xdata_merged) | |
87 singlefile <- singlefile_merged; rm(singlefile_merged) | |
88 md5sumList <- md5sumList_merged; rm(md5sumList_merged) | |
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) | |
90 | |
91 if (!is.null(args$sampleMetadata)) { | |
92 cat("\tXSET PHENODATA SETTING...\n") | |
93 sampleMetadataFile <- args$sampleMetadata | |
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) | |
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] | |
96 | |
97 if (any(is.na(pData(xdata)$sample_group))) { | |
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] | |
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) | |
100 print(error_message) | |
101 stop(error_message) | |
102 } | |
103 } | |
104 | |
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData } | |
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData } | |
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData } | |
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData } | |
109 | |
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted)) | |
111 } | |
112 | |
113 #@author G. Le Corguille | |
114 # This function convert if it is required the Retention Time in minutes | |
17 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | 115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { |
18 if (convertRTMinute){ | 116 if (convertRTMinute){ |
19 #converting the retention times (seconds) into minutes | 117 #converting the retention times (seconds) into minutes |
20 print("converting the retention times into minutes in the variableMetadata") | 118 print("converting the retention times into minutes in the variableMetadata") |
21 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60 | 119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 |
22 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60 | 120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 |
23 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60 | 121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 |
24 } | 122 } |
25 return (variableMetadata) | 123 return (variableMetadata) |
26 } | 124 } |
27 | 125 |
28 #@author G. Le Corguille | 126 #@author G. Le Corguille |
29 #This function format ions identifiers | 127 # This function format ions identifiers |
30 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { | 128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { |
31 splitDeco = strsplit(as.character(variableMetadata$name),"_") | 129 splitDeco <- strsplit(as.character(variableMetadata$name),"_") |
32 idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | 130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) |
33 namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | 131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) |
34 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | 132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) |
35 return(variableMetadata) | 133 return(variableMetadata) |
36 } | 134 } |
37 | 135 |
38 #@author G. Le Corguille | 136 #@author G. Le Corguille |
137 # This function convert the remain NA to 0 in the dataMatrix | |
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) { | |
139 if (naTOzero){ | |
140 dataMatrix[is.na(dataMatrix)] <- 0 | |
141 } | |
142 return (dataMatrix) | |
143 } | |
144 | |
145 #@author G. Le Corguille | |
146 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
147 getPlotChromPeakDensity <- function(xdata, mzdigit=4) { | |
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | |
149 | |
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | |
151 | |
152 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
153 names(group_colors) <- unique(xdata$sample_group) | |
154 | |
155 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | |
156 for (i in 1:nrow(featureDefinitions(xdata))) { | |
157 mzmin = featureDefinitions(xdata)[i,]$mzmin | |
158 mzmax = featureDefinitions(xdata)[i,]$mzmax | |
159 plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) | |
160 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
161 } | |
162 | |
163 dev.off() | |
164 } | |
165 | |
166 #@author G. Le Corguille | |
167 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
168 getPlotAdjustedRtime <- function(xdata) { | |
169 | |
170 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | |
171 | |
172 # Color by group | |
173 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
174 if (length(group_colors) > 1) { | |
175 names(group_colors) <- unique(xdata$sample_group) | |
176 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | |
177 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
178 } | |
179 | |
180 # Color by sample | |
181 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) | |
182 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | |
183 | |
184 dev.off() | |
185 } | |
186 | |
187 #@author G. Le Corguille | |
39 # value: intensity values to be used into, maxo or intb | 188 # value: intensity values to be used into, maxo or intb |
40 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { | 189 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput) { |
41 variableMetadata_dataMatrix = peakTable(xset, method="medret", value=intval) | 190 dataMatrix <- featureValues(xdata, method="medret", value=intval) |
42 variableMetadata_dataMatrix = cbind(name=groupnames(xset),variableMetadata_dataMatrix) | 191 colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix)) |
43 | 192 dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix) |
44 dataMatrix = variableMetadata_dataMatrix[,(make.names(colnames(variableMetadata_dataMatrix)) %in% c("name", make.names(sampnames(xset))))] | 193 variableMetadata <- featureDefinitions(xdata) |
45 | 194 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" |
46 variableMetadata = variableMetadata_dataMatrix[,!(make.names(colnames(variableMetadata_dataMatrix)) %in% c(make.names(sampnames(xset))))] | 195 variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata) |
47 variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute) | 196 |
48 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | 197 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) |
198 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | |
199 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero) | |
49 | 200 |
50 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) | 201 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) |
51 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) | 202 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) |
52 } | 203 |
53 | 204 } |
54 #@author Y. Guitton | 205 |
55 getBPC <- function(file,rtcor=NULL, ...) { | 206 #@author G. Le Corguille |
56 object <- xcmsRaw(file) | 207 # It allow different of field separators |
57 sel <- profRange(object, ...) | 208 getDataFrameFromFile <- function(filename, header=T) { |
58 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) | 209 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) |
59 #plotChrom(xcmsRaw(file), base=T) | 210 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) |
60 } | 211 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) |
61 | 212 if (ncol(myDataFrame) < 2) { |
62 #@author Y. Guitton | 213 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" |
63 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { | 214 print(error_message) |
64 cat("Creating BIC pdf...\n") | 215 stop(error_message) |
65 | 216 } |
66 if (is.null(xcmsSet)) { | 217 return(myDataFrame) |
67 cat("Enter an xcmsSet \n") | 218 } |
68 stop() | 219 |
69 } else { | 220 #@author G. Le Corguille |
70 files <- filepaths(xcmsSet) | 221 # Draw the BPI and TIC graphics |
71 } | 222 # colored by sample names or class names |
72 | 223 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { |
73 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | 224 |
74 | 225 if (aggregationFun == "sum") |
75 classnames<-vector("list",length(phenoDataClass)) | 226 type="Total Ion Chromatograms" |
76 for (i in 1:length(phenoDataClass)){ | 227 else |
77 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) | 228 type="Base Peak Intensity Chromatograms" |
78 } | 229 |
79 | 230 adjusted="Raw" |
80 N <- dim(phenoData(xcmsSet))[1] | 231 if (hasAdjustedRtime(xdata)) |
81 | 232 adjusted="Adjusted" |
82 TIC <- vector("list",N) | 233 |
83 | 234 main <- paste(type,":",adjusted,"data") |
84 | 235 |
85 for (j in 1:N) { | 236 pdf(pdfname, width=16, height=10) |
86 | 237 |
87 TIC[[j]] <- getBPC(files[j]) | 238 # Color by group |
88 #good for raw | 239 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] |
89 # seems strange for corrected | 240 if (length(group_colors) > 1) { |
90 #errors if scanrange used in xcmsSetgeneration | 241 names(group_colors) <- unique(xdata$sample_group) |
91 if (!is.null(xcmsSet) && rt == "corrected") | 242 plot(chrom, col = group_colors[chrom$sample_group], main=main) |
92 rtcor <- xcmsSet@rt$corrected[[j]] | 243 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) |
93 else | 244 } |
94 rtcor <- NULL | 245 |
95 | 246 # Color by sample |
96 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) | 247 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) |
97 # TIC[[j]][,1]<-rtcor | 248 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) |
98 } | 249 |
99 | 250 dev.off() |
100 | 251 } |
101 | 252 |
102 pdf(pdfname,w=16,h=10) | 253 |
103 cols <- rainbow(N) | 254 # Get the polarities from all the samples of a condition |
104 lty = 1:N | |
105 pch = 1:N | |
106 #search for max x and max y in BPCs | |
107 xlim = range(sapply(TIC, function(x) range(x[,1]))) | |
108 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
109 ylim = c(-ylim[2], ylim[2]) | |
110 | |
111 | |
112 ##plot start | |
113 | |
114 if (length(phenoDataClass)>2){ | |
115 for (k in 1:(length(phenoDataClass)-1)){ | |
116 for (l in (k+1):length(phenoDataClass)){ | |
117 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) | |
118 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
119 colvect<-NULL | |
120 for (j in 1:length(classnames[[k]])) { | |
121 tic <- TIC[[classnames[[k]][j]]] | |
122 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
123 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
124 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
125 } | |
126 for (j in 1:length(classnames[[l]])) { | |
127 # i=class2names[j] | |
128 tic <- TIC[[classnames[[l]][j]]] | |
129 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
130 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
131 } | |
132 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
133 } | |
134 } | |
135 }#end if length >2 | |
136 | |
137 if (length(phenoDataClass)==2){ | |
138 k=1 | |
139 l=2 | |
140 colvect<-NULL | |
141 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
142 | |
143 for (j in 1:length(classnames[[k]])) { | |
144 | |
145 tic <- TIC[[classnames[[k]][j]]] | |
146 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
147 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
148 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
149 } | |
150 for (j in 1:length(classnames[[l]])) { | |
151 # i=class2names[j] | |
152 tic <- TIC[[classnames[[l]][j]]] | |
153 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
154 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
155 } | |
156 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
157 | |
158 }#end length ==2 | |
159 | |
160 #case where only one class | |
161 if (length(phenoDataClass)==1){ | |
162 k=1 | |
163 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
164 colvect<-NULL | |
165 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
166 | |
167 for (j in 1:length(classnames[[k]])) { | |
168 tic <- TIC[[classnames[[k]][j]]] | |
169 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
170 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
171 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
172 } | |
173 | |
174 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
175 | |
176 }#end length ==1 | |
177 | |
178 dev.off() #pdf(pdfname,w=16,h=10) | |
179 | |
180 invisible(TIC) | |
181 } | |
182 | |
183 | |
184 | |
185 #@author Y. Guitton | |
186 getTIC <- function(file,rtcor=NULL) { | |
187 object <- xcmsRaw(file) | |
188 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) | |
189 } | |
190 | |
191 ## | |
192 ## overlay TIC from all files in current folder or from xcmsSet, create pdf | |
193 ## | |
194 #@author Y. Guitton | |
195 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { | |
196 cat("Creating TIC pdf...\n") | |
197 | |
198 if (is.null(xcmsSet)) { | |
199 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
200 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") | |
201 if (is.null(files)) | |
202 files <- getwd() | |
203 info <- file.info(files) | |
204 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) | |
205 files <- c(files[!info$isdir], listed) | |
206 } else { | |
207 files <- filepaths(xcmsSet) | |
208 } | |
209 | |
210 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | |
211 classnames<-vector("list",length(phenoDataClass)) | |
212 for (i in 1:length(phenoDataClass)){ | |
213 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) | |
214 } | |
215 | |
216 N <- length(files) | |
217 TIC <- vector("list",N) | |
218 | |
219 for (i in 1:N) { | |
220 if (!is.null(xcmsSet) && rt == "corrected") | |
221 rtcor <- xcmsSet@rt$corrected[[i]] else | |
222 rtcor <- NULL | |
223 TIC[[i]] <- getTIC(files[i],rtcor=rtcor) | |
224 } | |
225 | |
226 pdf(pdfname,w=16,h=10) | |
227 cols <- rainbow(N) | |
228 lty = 1:N | |
229 pch = 1:N | |
230 #search for max x and max y in TICs | |
231 xlim = range(sapply(TIC, function(x) range(x[,1]))) | |
232 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
233 ylim = c(-ylim[2], ylim[2]) | |
234 | |
235 | |
236 ##plot start | |
237 if (length(phenoDataClass)>2){ | |
238 for (k in 1:(length(phenoDataClass)-1)){ | |
239 for (l in (k+1):length(phenoDataClass)){ | |
240 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) | |
241 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
242 colvect<-NULL | |
243 for (j in 1:length(classnames[[k]])) { | |
244 tic <- TIC[[classnames[[k]][j]]] | |
245 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
246 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
247 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
248 } | |
249 for (j in 1:length(classnames[[l]])) { | |
250 # i=class2names[j] | |
251 tic <- TIC[[classnames[[l]][j]]] | |
252 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
253 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
254 } | |
255 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
256 } | |
257 } | |
258 }#end if length >2 | |
259 if (length(phenoDataClass)==2){ | |
260 k=1 | |
261 l=2 | |
262 | |
263 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
264 colvect<-NULL | |
265 for (j in 1:length(classnames[[k]])) { | |
266 tic <- TIC[[classnames[[k]][j]]] | |
267 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
268 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
269 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
270 } | |
271 for (j in 1:length(classnames[[l]])) { | |
272 # i=class2names[j] | |
273 tic <- TIC[[classnames[[l]][j]]] | |
274 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
275 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
276 } | |
277 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
278 | |
279 }#end length ==2 | |
280 | |
281 #case where only one class | |
282 if (length(phenoDataClass)==1){ | |
283 k=1 | |
284 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
285 | |
286 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
287 colvect<-NULL | |
288 for (j in 1:length(classnames[[k]])) { | |
289 tic <- TIC[[classnames[[k]][j]]] | |
290 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
291 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
292 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
293 } | |
294 | |
295 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
296 | |
297 }#end length ==1 | |
298 | |
299 dev.off() #pdf(pdfname,w=16,h=10) | |
300 | |
301 invisible(TIC) | |
302 } | |
303 | |
304 | |
305 | |
306 ## | |
307 ## Get the polarities from all the samples of a condition | |
308 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 255 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
309 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 256 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
310 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | 257 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { |
311 cat("Creating the sampleMetadata file...\n") | 258 cat("Creating the sampleMetadata file...\n") |
312 | 259 |
313 #Create the sampleMetada dataframe | 260 #Create the sampleMetada dataframe |
314 sampleMetadata=xset@phenoData | 261 sampleMetadata <- xdata@phenoData@data |
315 sampleNamesOrigin=rownames(sampleMetadata) | 262 rownames(sampleMetadata) <- NULL |
316 sampleNamesMakeNames=make.names(sampleNamesOrigin) | 263 colnames(sampleMetadata) <- c("sampleMetadata", "class") |
264 | |
265 sampleNamesOrigin <- sampleMetadata$sampleMetadata | |
266 sampleNamesMakeNames <- make.names(sampleNamesOrigin) | |
317 | 267 |
318 if (any(duplicated(sampleNamesMakeNames))) { | 268 if (any(duplicated(sampleNamesMakeNames))) { |
319 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | 269 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) |
320 for (sampleName in sampleNamesOrigin) { | 270 for (sampleName in sampleNamesOrigin) { |
321 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | 271 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) |
328 for (sampleName in sampleNamesOrigin) { | 278 for (sampleName in sampleNamesOrigin) { |
329 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | 279 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) |
330 } | 280 } |
331 } | 281 } |
332 | 282 |
333 sampleMetadata$sampleMetadata=sampleNamesMakeNames | 283 sampleMetadata$sampleMetadata <- sampleNamesMakeNames |
334 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns | 284 |
335 rownames(sampleMetadata)=NULL | 285 |
336 | |
337 #Create a list of files name in the current directory | |
338 list_files=xset@filepaths | |
339 #For each sample file, the following actions are done | 286 #For each sample file, the following actions are done |
340 for (file in list_files){ | 287 for (fileIdx in 1:length(fileNames(xdata))) { |
341 #Check if the file is in the CDF format | 288 #Check if the file is in the CDF format |
342 if (!mzR:::netCDFIsFile(file)){ | 289 if (!mzR:::netCDFIsFile(fileNames(xdata))) { |
343 | 290 |
344 # If the column isn't exist, with add one filled with NA | 291 # If the column isn't exist, with add one filled with NA |
345 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA | 292 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA |
346 | 293 |
347 #Create a simple xcmsRaw object for each sample | |
348 xcmsRaw=xcmsRaw(file) | |
349 #Extract the polarity (a list of polarities) | 294 #Extract the polarity (a list of polarities) |
350 polarity=xcmsRaw@polarity | 295 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] |
351 #Verify if all the scans have the same polarity | 296 #Verify if all the scans have the same polarity |
352 uniq_list=unique(polarity) | 297 uniq_list <- unique(polarity) |
353 if (length(uniq_list)>1){ | 298 if (length(uniq_list)>1){ |
354 polarity="mixed" | 299 polarity <- "mixed" |
355 } else { | 300 } else { |
356 polarity=as.character(uniq_list) | 301 polarity <- as.character(uniq_list) |
357 } | 302 } |
358 #Transforms the character to obtain only the sample name | |
359 filename=basename(file) | |
360 library(tools) | |
361 samplename=file_path_sans_ext(filename) | |
362 | 303 |
363 #Set the polarity attribute | 304 #Set the polarity attribute |
364 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity | 305 sampleMetadata$polarity[fileIdx] <- polarity |
365 | |
366 #Delete xcmsRaw object because it creates a bug for the fillpeaks step | |
367 rm(xcmsRaw) | |
368 } | 306 } |
369 | 307 |
370 } | 308 } |
371 | 309 |
372 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | 310 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) |
373 | 311 |
374 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) | 312 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) |
375 | 313 |
376 } | 314 } |
377 | 315 |
378 | 316 |
379 ## | 317 # This function check if xcms will found all the files |
380 ## This function check if xcms will found all the files | |
381 ## | |
382 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 318 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
383 checkFilesCompatibilityWithXcms <- function(directory) { | 319 checkFilesCompatibilityWithXcms <- function(directory) { |
384 cat("Checking files filenames compatibilities with xmcs...\n") | 320 cat("Checking files filenames compatibilities with xmcs...\n") |
385 # WHAT XCMS WILL FIND | 321 # WHAT XCMS WILL FIND |
386 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 322 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
387 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | 323 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") |
388 info <- file.info(directory) | 324 info <- file.info(directory) |
389 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) | 325 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) |
390 files <- c(directory[!info$isdir], listed) | 326 files <- c(directory[!info$isdir], listed) |
391 files_abs <- file.path(getwd(), files) | 327 files_abs <- file.path(getwd(), files) |
392 exists <- file.exists(files_abs) | 328 exists <- file.exists(files_abs) |
393 files[exists] <- files_abs[exists] | 329 files[exists] <- files_abs[exists] |
394 files[exists] <- sub("//","/",files[exists]) | 330 files[exists] <- sub("//","/",files[exists]) |
395 | 331 |
396 # WHAT IS ON THE FILESYSTEM | 332 # WHAT IS ON THE FILESYSTEM |
397 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) | 333 filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T) |
398 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | 334 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] |
399 | 335 |
400 # COMPARISON | 336 # COMPARISON |
401 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | 337 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { |
402 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | 338 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) |
403 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | 339 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) |
404 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 340 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
405 } | 341 } |
406 } | 342 } |
407 | 343 |
408 | 344 |
409 | 345 #This function list the compatible files within the directory as xcms did |
410 ## | 346 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
411 ## This function check if XML contains special caracters. It also checks integrity and completness. | 347 getMSFiles <- function (directory) { |
412 ## | 348 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
349 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
350 info <- file.info(directory) | |
351 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | |
352 files <- c(directory[!info$isdir], listed) | |
353 exists <- file.exists(files) | |
354 files <- files[exists] | |
355 return(files) | |
356 } | |
357 | |
358 # This function check if XML contains special caracters. It also checks integrity and completness. | |
413 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 359 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
414 checkXmlStructure <- function (directory) { | 360 checkXmlStructure <- function (directory) { |
415 cat("Checking XML structure...\n") | 361 cat("Checking XML structure...\n") |
416 | 362 |
417 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | 363 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") |
418 capture=system(cmd,intern=TRUE) | 364 capture <- system(cmd, intern=TRUE) |
419 | 365 |
420 if (length(capture)>0){ | 366 if (length(capture)>0){ |
421 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | 367 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) |
422 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | 368 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
423 write(capture, stderr()) | 369 write(capture, stderr()) |
425 } | 371 } |
426 | 372 |
427 } | 373 } |
428 | 374 |
429 | 375 |
430 ## | 376 # This function check if XML contain special characters |
431 ## This function check if XML contain special characters | |
432 ## | |
433 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 377 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
434 deleteXmlBadCharacters<- function (directory) { | 378 deleteXmlBadCharacters<- function (directory) { |
435 cat("Checking Non ASCII characters in the XML...\n") | 379 cat("Checking Non ASCII characters in the XML...\n") |
436 | 380 |
437 processed=F | 381 processed <- F |
438 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) | 382 l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) |
439 for (i in l){ | 383 for (i in l){ |
440 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") | 384 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") |
441 capture=suppressWarnings(system(cmd,intern=TRUE)) | 385 capture <- suppressWarnings(system(cmd, intern=TRUE)) |
442 if (length(capture)>0){ | 386 if (length(capture)>0){ |
443 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | 387 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) |
444 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | 388 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) |
445 c=system(cmd,intern=TRUE) | 389 c <- system(cmd, intern=TRUE) |
446 capture="" | 390 capture <- "" |
447 processed=T | 391 processed <- T |
448 } | 392 } |
449 } | 393 } |
450 if (processed) cat("\n\n") | 394 if (processed) cat("\n\n") |
451 return(processed) | 395 return(processed) |
452 } | 396 } |
453 | 397 |
454 | 398 |
455 ## | 399 # This function will compute MD5 checksum to check the data integrity |
456 ## This function will compute MD5 checksum to check the data integrity | |
457 ## | |
458 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 400 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
459 getMd5sum <- function (directory) { | 401 getMd5sum <- function (directory) { |
460 cat("Compute md5 checksum...\n") | 402 cat("Compute md5 checksum...\n") |
461 # WHAT XCMS WILL FIND | 403 # WHAT XCMS WILL FIND |
462 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 404 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
463 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | 405 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") |
464 info <- file.info(directory) | 406 info <- file.info(directory) |
465 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) | 407 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) |
466 files <- c(directory[!info$isdir], listed) | 408 files <- c(directory[!info$isdir], listed) |
467 exists <- file.exists(files) | 409 exists <- file.exists(files) |
468 files <- files[exists] | 410 files <- files[exists] |
469 | 411 |
470 library(tools) | 412 library(tools) |
474 return(as.matrix(md5sum(files))) | 416 return(as.matrix(md5sum(files))) |
475 } | 417 } |
476 | 418 |
477 | 419 |
478 # This function get the raw file path from the arguments | 420 # This function get the raw file path from the arguments |
479 getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) { | 421 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
480 if (!is.null(listArguments[["zipfile"]])) zipfile = listArguments[["zipfile"]] | 422 getRawfilePathFromArguments <- function(singlefile, zipfile, args, prefix="") { |
481 if (!is.null(listArguments[["zipfilePositive"]])) zipfile = listArguments[["zipfilePositive"]] | 423 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'") |
482 if (!is.null(listArguments[["zipfileNegative"]])) zipfile = listArguments[["zipfileNegative"]] | 424 |
483 | 425 if (!is.null(args[[paste0("zipfile",prefix)]])) zipfile <- args[[paste0("zipfile",prefix)]] |
484 if (!is.null(listArguments[["singlefile_galaxyPath"]])) { | 426 |
485 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPath"]]; | 427 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) { |
486 singlefile_sampleNames = listArguments[["singlefile_sampleName"]] | 428 singlefile_galaxyPaths <- args[[paste0("singlefile_galaxyPath",prefix)]] |
487 } | 429 singlefile_sampleNames <- args[[paste0("singlefile_sampleName",prefix)]] |
488 if (!is.null(listArguments[["singlefile_galaxyPathPositive"]])) { | 430 } |
489 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathPositive"]]; | 431 if (exists("singlefile_galaxyPaths")){ |
490 singlefile_sampleNames = listArguments[["singlefile_sampleNamePositive"]] | 432 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) |
491 } | 433 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) |
492 if (!is.null(listArguments[["singlefile_galaxyPathNegative"]])) { | 434 |
493 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathNegative"]]; | 435 singlefile <- NULL |
494 singlefile_sampleNames = listArguments[["singlefile_sampleNameNegative"]] | 436 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { |
495 } | 437 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] |
496 if (exists("singlefile_galaxyPaths")){ | 438 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] |
497 singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,",")) | 439 # In case, an url is used to import data within Galaxy |
498 singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,",")) | 440 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1) |
499 | 441 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath |
500 singlefile=NULL | 442 } |
501 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | 443 } |
502 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] | 444 return(list(zipfile=zipfile, singlefile=singlefile)) |
503 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] | 445 } |
504 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath | |
505 } | |
506 } | |
507 for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { | |
508 listArguments[[argument]]=NULL | |
509 } | |
510 return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments)) | |
511 } | |
512 | |
513 | 446 |
514 # This function retrieve the raw file in the working directory | 447 # This function retrieve the raw file in the working directory |
515 # - if zipfile: unzip the file with its directory tree | 448 # - if zipfile: unzip the file with its directory tree |
516 # - if singlefiles: set symlink with the good filename | 449 # - if singlefiles: set symlink with the good filename |
450 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
517 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | 451 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { |
518 if(!is.null(singlefile) && (length("singlefile")>0)) { | 452 if(!is.null(singlefile) && (length("singlefile")>0)) { |
519 for (singlefile_sampleName in names(singlefile)) { | 453 for (singlefile_sampleName in names(singlefile)) { |
520 singlefile_galaxyPath = singlefile[[singlefile_sampleName]] | 454 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] |
521 if(!file.exists(singlefile_galaxyPath)){ | 455 if(!file.exists(singlefile_galaxyPath)){ |
522 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 456 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") |
523 print(error_message); stop(error_message) | 457 print(error_message); stop(error_message) |
524 } | 458 } |
525 | 459 |
526 file.symlink(singlefile_galaxyPath,singlefile_sampleName) | 460 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) |
527 } | 461 file.copy(singlefile_galaxyPath, singlefile_sampleName) |
528 directory = "." | 462 |
529 | 463 } |
530 } | 464 directory <- "." |
531 if(!is.null(zipfile) && (zipfile!="")) { | 465 |
466 } | |
467 if(!is.null(zipfile) && (zipfile != "")) { | |
532 if(!file.exists(zipfile)){ | 468 if(!file.exists(zipfile)){ |
533 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 469 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") |
534 print(error_message) | 470 print(error_message) |
535 stop(error_message) | 471 stop(error_message) |
536 } | 472 } |
537 | 473 |
538 #list all file in the zip file | 474 #list all file in the zip file |
539 #zip_files=unzip(zipfile,list=T)[,"Name"] | 475 #zip_files <- unzip(zipfile,list=T)[,"Name"] |
540 | 476 |
541 #unzip | 477 #unzip |
542 suppressWarnings(unzip(zipfile, unzip="unzip")) | 478 suppressWarnings(unzip(zipfile, unzip="unzip")) |
543 | 479 |
544 #get the directory name | 480 #get the directory name |
545 filesInZip=unzip(zipfile, list=T); | 481 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) |
546 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); | 482 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) |
547 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | 483 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] |
548 directory = "." | 484 directory <- "." |
549 if (length(directories) == 1) directory = directories | 485 if (length(directories) == 1) directory <- directories |
550 | 486 |
551 cat("files_root_directory\t",directory,"\n") | 487 cat("files_root_directory\t",directory,"\n") |
552 | 488 |
553 } | 489 } |
554 return (directory) | 490 return (directory) |
555 } | 491 } |
492 | |
493 | |
494 # This function retrieve a xset like object | |
495 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
496 getxcmsSetObject <- function(xobject) { | |
497 # XCMS 1.x | |
498 if (class(xobject) == "xcmsSet") | |
499 return (xobject) | |
500 # XCMS 3.x | |
501 if (class(xobject) == "XCMSnExp") { | |
502 # Get the legacy xcmsSet object | |
503 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | |
504 if (!is.null(xset@phenoData$sample_group)) | |
505 sampclass(xset) <- xset@phenoData$sample_group | |
506 else | |
507 sampclass(xset) <- "." | |
508 return (xset) | |
509 } | |
510 } |