annotate BC/batch_correction_3Llauncher.R @ 4:23314e1192d4 draft default tip

Uploaded
author melpetera
date Thu, 14 Jan 2021 09:56:58 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
1 ###############################################################################################################
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
2 # batch_correction_3Llauncher #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
3 # #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
4 # Authors: Jean-Francois MARTIN / Melanie Petera #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
5 # Starting date: 04-08-2020 #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
6 # Based on batch_correction_wrapper.R version 2.91 #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
7 # Version 1: 02-10-2020 #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
8 # - split of tool-linked code and script-linked one #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
9 # - handling of sample tags' parameters #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
10 # - accepting samples beyond pools and samples #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
11 # - dealing with special characters in IDs and column names #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
12 # - adding a min.norm argument to the function #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
13 # #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
14 # Input files: dataMatrix.txt, sampleMetadata.txt, variableMetadata.txt (BC only) #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
15 # Output files: graph.pdf, corrected table (BC only), diagnostic table (DBC only), variableMetadata (BC only) #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
16 # #
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
17 ###############################################################################################################
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
18
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
19 meth3L <- function(idsample,iddata,sample_type_col_name,injection_order_col_name,batch_col_name,sample_type_tags,
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
20 factbio,analyse,metaion,detail,method,outlog,span,valnull,
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
21 rdata_output,dataMatrix_out,variableMetadata_out,out_graph_pdf,out_preNormSummary,min.norm){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
22
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
23 ## Import function
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
24 tab.import <- function(tested.file,tabtype){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
25 tab.res <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = ''), error=conditionMessage)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
26 if(length(tab.res)==1){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
27 stop(paste("Could not import the",tabtype,"file. There may be issues in your table integrity.\nCorresponding R error message:\n",tab.res))
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
28 }else{
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
29 tab.comp <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = '',quote=""), error=conditionMessage)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
30 if((length(tab.comp)!=1)&&(dim(tab.res)!=dim(tab.comp))){ # wrong original import due to quotes inside a column name
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
31 return(tab.comp)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
32 }else{ return(tab.res) }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
33 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
34 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
35
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
36 ## Reading of input files
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
37 idsample=tab.import(idsample,"sampleMetadata")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
38 iddata=tab.import(iddata,"dataMatrix")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
39
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
40 ### Table match check
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
41 table.check <- match2(iddata,idsample,"sample")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
42 if(length(table.check)>1){check.err(table.check)}
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
43
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
44 ### StockID
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
45 samp.id <- stockID(iddata,idsample,"sample")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
46 iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
47
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
48 ### Checking mandatory variables
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
49 mand.check <- ""
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
50 for(mandcol in c(sample_type_col_name, injection_order_col_name, batch_col_name)){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
51 if(!(mandcol%in%colnames(idsample))){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
52 mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n",
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
53 "Note: column names are case-sensitive.\n")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
54 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
55 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
56 if(length(mand.check)>1){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
57 mand.check <- c(mand.check,"\nFor more information, see the help section or:",
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
58 "\n http://workflow4metabolomics.org/sites/",
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
59 "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
60 check.err(mand.check)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
61 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
62
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
63 if(analyse == "batch_correction") {
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
64 ## Reading of Metadata Ions file
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
65 metaion=read.table(metaion,header=T,sep='\t',check.names=FALSE,comment.char = '')
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
66 ## Table match check
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
67 table.check <- c(table.check,match2(iddata,metaion,"variable"))
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
68 ## StockID
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
69 var.id <- stockID(iddata,metaion,"variable")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
70 iddata<-var.id$dataMatrix ; metaion<-var.id$Metadata ; var.id<-var.id$id.match
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
71 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
72
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
73 ### Formating
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
74 idsample[[1]]=make.names(idsample[[1]])
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
75 dimnames(iddata)[[1]]=iddata[[1]]
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
76
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
77 ### Transposition of ions data
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
78 idTdata=t(iddata[,2:dim(iddata)[2]])
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
79 idTdata=data.frame(dimnames(idTdata)[[1]],idTdata)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
80
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
81 ### Merge of 2 files (ok even if the two dataframe are not sorted on the same key)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
82 ids=merge(idsample, idTdata, by.x=1, by.y=1)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
83
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
84 ids[[batch_col_name]]=as.factor(ids[[batch_col_name]])
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
85 nbid=dim(idsample)[2]
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
86
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
87 ### Checking the number of sample and pool
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
88
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
89 # least 2 samples
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
90 if(length(which(ids[[sample_type_col_name]] %in% sample_type_tags$sample))<2){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
91 table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.",
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
92 "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
93 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
94
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
95 # least 2 pools per batch for all batchs
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
96 B <- rep(0,length(levels(ids[[batch_col_name]])))
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
97 for(nbB in 1:length(levels(ids[[batch_col_name]]))){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
98 B[nbB]<-length(which(ids[which(ids[[batch_col_name]]==(levels(ids[[batch_col_name]])[nbB])),,drop=FALSE][[sample_type_col_name]] %in% sample_type_tags$pool))
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
99 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
100 if(length(which(B>1))==0){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
101 table.check <- c(table.check,"\nError: less than 2 pools specified in at least one batch in sample metadata.",
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
102 "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
103 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
104
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
105 ### Checking the unicity of samples and variables
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
106 uni.check <- function(tested.tab,tabtype,err.obj){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
107 unicity <- duplicated(tested.tab[,1])
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
108 if(sum(unicity)>0){
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
109 #Sending back an explicit error
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
110 duptable <- t(t(table(tested.tab[,1][unicity])+1))
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
111 err.obj <- c(err.obj,paste0("\n-------\nError: your '",tabtype,"' IDs contain duplicates:\n"),
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
112 paste(rownames(duptable),duptable,sep=": ",collapse="\n"),
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
113 "\nSince identifiers are meant to be unique, please check your data.\n-------\n")
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
114 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
115 return(err.obj)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
116 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
117 table.check <- uni.check(ids,"sample",table.check)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
118 if(analyse == "batch_correction"){table.check <- uni.check(metaion,"variable",table.check)}
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
119
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
120 ## error check
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
121 check.err(table.check)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
122
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
123
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
124 ### BC/DBC-specific processing
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
125
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
126 # Gathering mandatory information in a single object
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
127 sm.meta <- list(batch=batch_col_name, injectionOrder=injection_order_col_name, sampleType=sample_type_col_name, sampleTag=sample_type_tags)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
128
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
129 if(analyse == "batch_correction") {
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
130 ## Launch
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
131 res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,FALSE,FALSE,method,span,valnull,sm.meta,min.norm)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
132 ## Get back original IDs
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
133 var.id <- reproduceID(res[[1]],res[[2]],"variable",var.id)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
134 res[[1]] <- var.id$dataMatrix ; res[[2]] <- var.id$Metadata
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
135 samp.id <- reproduceID(res[[1]],res[[3]],"sample",samp.id)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
136 res[[1]] <- samp.id$dataMatrix ; res[[3]] <- samp.id$Metadata
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
137 ## Save files
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
138 save(res, file=rdata_output)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
139 write.table(res[[1]], file=dataMatrix_out, sep = '\t', row.names=FALSE, quote=FALSE)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
140 write.table(res[[2]], file=variableMetadata_out, sep = '\t', row.names=FALSE, quote=FALSE)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
141 }else{
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
142 ## Launch
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
143 plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,span,sm.meta)
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
144 }
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
145
23314e1192d4 Uploaded
melpetera
parents:
diff changeset
146 }#end of meth3L