Mercurial > repos > melpetera > batchcorrection
comparison BC/batch_correction_3Llauncher.R @ 4:23314e1192d4 draft default tip
Uploaded
author | melpetera |
---|---|
date | Thu, 14 Jan 2021 09:56:58 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:73892ef177e3 | 4:23314e1192d4 |
---|---|
1 ############################################################################################################### | |
2 # batch_correction_3Llauncher # | |
3 # # | |
4 # Authors: Jean-Francois MARTIN / Melanie Petera # | |
5 # Starting date: 04-08-2020 # | |
6 # Based on batch_correction_wrapper.R version 2.91 # | |
7 # Version 1: 02-10-2020 # | |
8 # - split of tool-linked code and script-linked one # | |
9 # - handling of sample tags' parameters # | |
10 # - accepting samples beyond pools and samples # | |
11 # - dealing with special characters in IDs and column names # | |
12 # - adding a min.norm argument to the function # | |
13 # # | |
14 # Input files: dataMatrix.txt, sampleMetadata.txt, variableMetadata.txt (BC only) # | |
15 # Output files: graph.pdf, corrected table (BC only), diagnostic table (DBC only), variableMetadata (BC only) # | |
16 # # | |
17 ############################################################################################################### | |
18 | |
19 meth3L <- function(idsample,iddata,sample_type_col_name,injection_order_col_name,batch_col_name,sample_type_tags, | |
20 factbio,analyse,metaion,detail,method,outlog,span,valnull, | |
21 rdata_output,dataMatrix_out,variableMetadata_out,out_graph_pdf,out_preNormSummary,min.norm){ | |
22 | |
23 ## Import function | |
24 tab.import <- function(tested.file,tabtype){ | |
25 tab.res <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = ''), error=conditionMessage) | |
26 if(length(tab.res)==1){ | |
27 stop(paste("Could not import the",tabtype,"file. There may be issues in your table integrity.\nCorresponding R error message:\n",tab.res)) | |
28 }else{ | |
29 tab.comp <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = '',quote=""), error=conditionMessage) | |
30 if((length(tab.comp)!=1)&&(dim(tab.res)!=dim(tab.comp))){ # wrong original import due to quotes inside a column name | |
31 return(tab.comp) | |
32 }else{ return(tab.res) } | |
33 } | |
34 } | |
35 | |
36 ## Reading of input files | |
37 idsample=tab.import(idsample,"sampleMetadata") | |
38 iddata=tab.import(iddata,"dataMatrix") | |
39 | |
40 ### Table match check | |
41 table.check <- match2(iddata,idsample,"sample") | |
42 if(length(table.check)>1){check.err(table.check)} | |
43 | |
44 ### StockID | |
45 samp.id <- stockID(iddata,idsample,"sample") | |
46 iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match | |
47 | |
48 ### Checking mandatory variables | |
49 mand.check <- "" | |
50 for(mandcol in c(sample_type_col_name, injection_order_col_name, batch_col_name)){ | |
51 if(!(mandcol%in%colnames(idsample))){ | |
52 mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n", | |
53 "Note: column names are case-sensitive.\n") | |
54 } | |
55 } | |
56 if(length(mand.check)>1){ | |
57 mand.check <- c(mand.check,"\nFor more information, see the help section or:", | |
58 "\n http://workflow4metabolomics.org/sites/", | |
59 "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n") | |
60 check.err(mand.check) | |
61 } | |
62 | |
63 if(analyse == "batch_correction") { | |
64 ## Reading of Metadata Ions file | |
65 metaion=read.table(metaion,header=T,sep='\t',check.names=FALSE,comment.char = '') | |
66 ## Table match check | |
67 table.check <- c(table.check,match2(iddata,metaion,"variable")) | |
68 ## StockID | |
69 var.id <- stockID(iddata,metaion,"variable") | |
70 iddata<-var.id$dataMatrix ; metaion<-var.id$Metadata ; var.id<-var.id$id.match | |
71 } | |
72 | |
73 ### Formating | |
74 idsample[[1]]=make.names(idsample[[1]]) | |
75 dimnames(iddata)[[1]]=iddata[[1]] | |
76 | |
77 ### Transposition of ions data | |
78 idTdata=t(iddata[,2:dim(iddata)[2]]) | |
79 idTdata=data.frame(dimnames(idTdata)[[1]],idTdata) | |
80 | |
81 ### Merge of 2 files (ok even if the two dataframe are not sorted on the same key) | |
82 ids=merge(idsample, idTdata, by.x=1, by.y=1) | |
83 | |
84 ids[[batch_col_name]]=as.factor(ids[[batch_col_name]]) | |
85 nbid=dim(idsample)[2] | |
86 | |
87 ### Checking the number of sample and pool | |
88 | |
89 # least 2 samples | |
90 if(length(which(ids[[sample_type_col_name]] %in% sample_type_tags$sample))<2){ | |
91 table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.", | |
92 "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n") | |
93 } | |
94 | |
95 # least 2 pools per batch for all batchs | |
96 B <- rep(0,length(levels(ids[[batch_col_name]]))) | |
97 for(nbB in 1:length(levels(ids[[batch_col_name]]))){ | |
98 B[nbB]<-length(which(ids[which(ids[[batch_col_name]]==(levels(ids[[batch_col_name]])[nbB])),,drop=FALSE][[sample_type_col_name]] %in% sample_type_tags$pool)) | |
99 } | |
100 if(length(which(B>1))==0){ | |
101 table.check <- c(table.check,"\nError: less than 2 pools specified in at least one batch in sample metadata.", | |
102 "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n") | |
103 } | |
104 | |
105 ### Checking the unicity of samples and variables | |
106 uni.check <- function(tested.tab,tabtype,err.obj){ | |
107 unicity <- duplicated(tested.tab[,1]) | |
108 if(sum(unicity)>0){ | |
109 #Sending back an explicit error | |
110 duptable <- t(t(table(tested.tab[,1][unicity])+1)) | |
111 err.obj <- c(err.obj,paste0("\n-------\nError: your '",tabtype,"' IDs contain duplicates:\n"), | |
112 paste(rownames(duptable),duptable,sep=": ",collapse="\n"), | |
113 "\nSince identifiers are meant to be unique, please check your data.\n-------\n") | |
114 } | |
115 return(err.obj) | |
116 } | |
117 table.check <- uni.check(ids,"sample",table.check) | |
118 if(analyse == "batch_correction"){table.check <- uni.check(metaion,"variable",table.check)} | |
119 | |
120 ## error check | |
121 check.err(table.check) | |
122 | |
123 | |
124 ### BC/DBC-specific processing | |
125 | |
126 # Gathering mandatory information in a single object | |
127 sm.meta <- list(batch=batch_col_name, injectionOrder=injection_order_col_name, sampleType=sample_type_col_name, sampleTag=sample_type_tags) | |
128 | |
129 if(analyse == "batch_correction") { | |
130 ## Launch | |
131 res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,FALSE,FALSE,method,span,valnull,sm.meta,min.norm) | |
132 ## Get back original IDs | |
133 var.id <- reproduceID(res[[1]],res[[2]],"variable",var.id) | |
134 res[[1]] <- var.id$dataMatrix ; res[[2]] <- var.id$Metadata | |
135 samp.id <- reproduceID(res[[1]],res[[3]],"sample",samp.id) | |
136 res[[1]] <- samp.id$dataMatrix ; res[[3]] <- samp.id$Metadata | |
137 ## Save files | |
138 save(res, file=rdata_output) | |
139 write.table(res[[1]], file=dataMatrix_out, sep = '\t', row.names=FALSE, quote=FALSE) | |
140 write.table(res[[2]], file=variableMetadata_out, sep = '\t', row.names=FALSE, quote=FALSE) | |
141 }else{ | |
142 ## Launch | |
143 plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,span,sm.meta) | |
144 } | |
145 | |
146 }#end of meth3L |