comparison AskoR.R @ 0:ceef9bc6bbc7 draft

planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/askor commit 08a187f91ba050d584e586d2fcc99d984dac607c
author genouest
date Thu, 12 Apr 2018 05:23:45 -0400
parents
children 877d2be25a6a
comparison
equal deleted inserted replaced
-1:000000000000 0:ceef9bc6bbc7
1 asko3c <- function(data_list){
2 asko<-list()
3
4 ######### Condition ############
5
6 condition<-unique(data_list$samples$condition) # retrieval of different condition's names
7 col1<-which(colnames(data_list$samples)=="condition") # determination of number of the column "condition"
8 colcol<-which(colnames(data_list$samples)=="color")
9 if(is.null(parameters$fileofcount)){
10 col2<-which(colnames(data_list$samples)=="file") # determination of number of the column "replicate"
11 column_name<-colnames(data_list$samples[,c(-col1,-col2,-colcol)]) # retrieval of column names needful to create the file condition
12 }else{column_name<-colnames(data_list$samples[,c(-col1,-colcol)])}
13 condition_asko<-data.frame(row.names=condition) # initialization of the condition's data frame
14 #level<-list() # initialization of the list will contain the level
15 # of each experimental factor
16 for (name in column_name){ # for each experimental factor :
17 # if(str_detect(name, "condition")){ # for the column of conditions, the level is fixed to 0 because
18 # level<-append(level, 0) # "condition" must be the first column of the data frame
19 # }else{ #
20 # level<-append(level, length(levels(data_list$samples[,name]))) # adding to the list the level of other experimental factors
21 # }
22 #
23 condition_asko$n<-NA # initialization of new column in the condition's data frame
24 colnames(condition_asko)[colnames(condition_asko)=="n"]<-name # to rename the new column with with the name of experimental factor
25 for(condition_name in condition){ # for each condition's names
26 condition_asko[condition_name,name]<-as.character(unique(data_list$samples[data_list$samples$condition==condition_name, name]))
27 } # filling the condition's data frame
28 }
29 # order_level<-order(unlist(level)) # list to vector
30 # condition_asko<-condition_asko[,order_level] # order columns according to their level
31 #asko$condition<-condition_asko # adding data frame of conditions to asko object
32
33 #print(condition_asko)
34
35
36 #############contrast + context##################
37 i=0
38
39 contrast_asko<-data.frame(row.names = colnames(data_list$contrast)) # initialization of the contrast's data frame
40 contrast_asko$Contrast<-NA # all columns are created et initialized with
41 contrast_asko$context1<-NA # NA values
42 contrast_asko$context2<-NA #
43
44 list_context<-list() # initialization of context and condition lists
45 list_condition<-list() # will be used to create the context data frame
46 if(parameters$mk_context==TRUE){
47 for (contrast in colnames(data_list$contrast)){ # for each contrast :
48 i=i+1 # contrast data frame will be filled line by line
49 #print(contrast)
50 set_cond1<-row.names(data_list$contrast)[data_list$contrast[,contrast]>0] # retrieval of 1st set of condition's names implicated in a given contrast
51 set_cond2<-row.names(data_list$contrast)[data_list$contrast[,contrast]<0] # retrieval of 2nd set of condition's names implicated in a given contrast
52 parameters<-colnames(condition_asko) # retrieval of names of experimental factor
53 print(paste("set_cond1 : ", set_cond1, sep = ""))
54 # print(length(set_cond1))
55 print(paste("set_cond2 : ", set_cond2, sep = ""))
56 # print(length(set_cond2))
57 if(length(set_cond1)==1){complex1=F}else{complex1=T}# to determine if we have complex contrast (multiple conditions
58 if(length(set_cond2)==1){complex2=F}else{complex2=T}# compared to multiple conditions) or not
59 #print(complex1)
60 if(complex1==F && complex2==F){ # Case 1: one condition against one condition
61 contrast_asko[i,"context1"]<-set_cond1 # filling contrast data frame with the name of the 1st context
62 contrast_asko[i,"context2"]<-set_cond2 # filling contrast data frame with the name of the 2nd context
63 contrast_name<-paste(set_cond1,set_cond2, sep = "vs") # creation of contrast name by associating the names of contexts
64 contrast_asko[i,"Contrast"]<-contrast_name # filling contrast data frame with contrast name
65 list_context<-append(list_context, set_cond1) #
66 list_condition<-append(list_condition, set_cond1) # adding respectively to the lists "context" and "condition" the context name
67 list_context<-append(list_context, set_cond2) # and the condition name associated
68 list_condition<-append(list_condition, set_cond2) #
69 }
70 if(complex1==F && complex2==T){ # Case 2: one condition against multiple condition
71 contrast_asko[i,"context1"]<-set_cond1 # filling contrast data frame with the name of the 1st context
72 list_context<-append(list_context, set_cond1) # adding respectively to the lists "context" and "condition" the 1st context
73 list_condition<-append(list_condition, set_cond1) # name and the condition name associated
74 l=0
75 # "common_factor" will contain the common experimental factors shared by
76 common_factor=list() # conditions belonging to the complex context
77 for (param_names in parameters){ # for each experimental factor
78 facteur<-unique(c(condition_asko[,param_names])) # retrieval of possible values for the experimental factor
79 l=l+1 #
80 for(value in facteur){ # for each possible values
81 verif<-unique(str_detect(set_cond2, value)) # verification of the presence of values in each condition contained in the set
82 if(length(verif)==1 && verif==TRUE){common_factor[l]<-value} # if verif contains only TRUE, value of experimental factor
83 } # is added as common factor
84 }
85 if(length(common_factor)>1){ # if there are several common factor
86 common_factor<-toString(common_factor) # the list is converted to string
87 contx<-str_replace(common_factor,", ","")
88 contx<-str_replace_all(contx, "NULL", "")}else{contx<-common_factor} # and all common factor are concatenated to become the name of context
89 contrast_asko[i,"context2"]<-contx # filling contrast data frame with the name of the 2nd context
90 contrast_name<-paste(set_cond1,contx, sep = "vs") # concatenation of context names to make the contrast name
91 contrast_asko[i,"Contrast"]<-contrast_name # filling contrast data frame with the contrast name
92 for(j in 1:length(set_cond2)){ # for each condition contained in the complex context (2nd):
93 list_context<-append(list_context, contx) # adding condition name with the context name associated
94 list_condition<-append(list_condition, set_cond2[j]) # to their respective list
95 }
96 }
97 if(complex1==T && complex2==F){ # Case 3: multiple conditions against one condition
98 contrast_asko[i,"context2"]<-set_cond2 # filling contrast data frame with the name of the 2nd context
99 list_context<-append(list_context, set_cond2) # adding respectively to the lists "context" and "condition" the 2nd context
100 list_condition<-append(list_condition, set_cond2) # name and the 2nd condition name associated
101 l=0
102 # "common_factor" will contain the common experimental factors shared by
103 common_factor=list() # conditions belonging to the complex context
104 for (param_names in parameters){ # for each experimental factor:
105 facteur<-unique(c(condition_asko[,param_names])) # retrieval of possible values for the experimental factor
106 l=l+1
107 for(value in facteur){ # for each possible values:
108 verif<-unique(str_detect(set_cond1, value)) # verification of the presence of values in each condition contained in the set
109 if(length(verif)==1 && verif==TRUE){common_factor[l]<-value} # if verif contains only TRUE, value of experimental factor
110 } # is added as common factor
111 }
112 if(length(common_factor)>1){ # if there are several common factor
113 common_factor<-toString(common_factor) # the list is converted to string
114 contx<-str_replace(common_factor,", ","")
115 contx<-str_replace_all(contx, "NULL", "")}else{contx<-common_factor} # and all common factor are concatenated to become the name of context
116 contrast_asko[i,"context1"]<-contx # filling contrast data frame with the name of the 1st context
117 contrast_name<-paste(contx,set_cond2, sep = "vs") # concatenation of context names to make the contrast name
118 contrast_asko[i,"Contrast"]<-contrast_name # filling contrast data frame with the contrast name
119 for(j in 1:length(set_cond1)){ # for each condition contained in the complex context (1st):
120 list_context<-append(list_context, contx) # adding condition name with the context name associated
121 list_condition<-append(list_condition, set_cond1[j]) # to their respective list
122 }
123 }
124 if(complex1==T && complex2==T){ # Case 4: multiple conditions against multiple conditions
125 m=0 #
126 n=0 #
127 common_factor1=list() # list of common experimental factors shared by conditions of the 1st context
128 common_factor2=list() # list of common experimental factors shared by conditions of the 2nd context
129 w=1
130 for (param_names in parameters){ # for each experimental factor:
131 print(w)
132 w=w+1
133 facteur<-unique(c(condition_asko[,param_names])) # retrieval of possible values for the experimental factor
134 print(paste("facteur : ", facteur, sep=""))
135 for(value in facteur){ # for each possible values:
136 print(value)
137 #print(class(value))
138 #print(set_cond1)
139 verif1<-unique(str_detect(set_cond1, value)) # verification of the presence of values in each condition
140 # contained in the 1st context
141 verif2<-unique(str_detect(set_cond2, value)) # verification of the presence of values in each condition
142 # contained in the 2nd context
143
144 if(length(verif1)==1 && verif1==TRUE){m=m+1;common_factor1[m]<-value} # if verif=only TRUE, value of experimental factor is added as common factor
145 if(length(verif2)==1 && verif2==TRUE){n=n+1;common_factor2[n]<-value} # if verif=only TRUE, value of experimental factor is added as common factor
146 }
147 }
148 print(paste("common_factor1 : ",common_factor1,sep=""))
149 print(paste("common_factor2 : ",common_factor2,sep=""))
150
151 if(length(common_factor1)>1){ # if there are several common factor for conditions in the 1st context
152 common_factor1<-toString(common_factor1) # conversion list to string
153 contx1<-str_replace(common_factor1,", ","")}else{contx1<-common_factor1}# all common factor are concatenated to become the name of context
154 contx1<-str_replace_all(contx1, "NULL", "")
155 print(paste("contx1 : ", contx1, sep=""))
156 if(length(common_factor2)>1){ # if there are several common factor for conditions in the 2nd context
157 common_factor2<-toString(common_factor2) # conversion list to string
158 contx2<-str_replace(common_factor2,", ","")}else{contx2<-common_factor2}# all common factor are concatenated to become the name of context
159 contx2<-str_replace_all(contx2, "NULL", "")
160 print(paste("contx2 : ", contx2, sep=""))
161 contrast_asko[i,"context1"]<-contx1 # filling contrast data frame with the name of the 1st context
162 contrast_asko[i,"context2"]<-contx2 # filling contrast data frame with the name of the 2nd context
163 contrast_asko[i,"Contrast"]<-paste(contx1,contx2, sep = "vs") # filling contrast data frame with the name of the contrast
164 for(j in 1:length(set_cond1)){ # for each condition contained in the complex context (1st):
165 list_context<-append(list_context, contx1) # verification of the presence of values in each condition
166 list_condition<-append(list_condition, set_cond1[j]) # contained in the 1st context
167 }
168 for(j in 1:length(set_cond2)){ # for each condition contained in the complex context (2nd):
169 list_context<-append(list_context, contx2) # verification of the presence of values in each condition
170 list_condition<-append(list_condition, set_cond2[j]) # contained in the 1st context
171 }
172 }
173 }
174 }
175 else{
176 for (contrast in colnames(data_list$contrast)){
177 i=i+1
178 contexts=strsplit2(contrast,"vs")
179 contrast_asko[i,"Contrast"]<-contrast
180 contrast_asko[i,"context1"]=contexts[1]
181 contrast_asko[i,"context2"]=contexts[2]
182 set_cond1<-row.names(data_list$contrast)[data_list$contrast[,contrast]>0]
183 set_cond2<-row.names(data_list$contrast)[data_list$contrast[,contrast]<0]
184 for (cond1 in set_cond1){
185 # print(contexts[1])
186 # print(cond1)
187 list_context<-append(list_context, contexts[1])
188 list_condition<-append(list_condition, cond1)
189 }
190 for (cond2 in set_cond2){
191 list_context<-append(list_context, contexts[2])
192 list_condition<-append(list_condition, cond2)
193 }
194 }
195 }
196
197 list_context<-unlist(list_context) # conversion list to vector
198 list_condition<-unlist(list_condition) # conversion list to vector
199 # print(list_condition)
200 # print(list_context)
201 context_asko<-data.frame(list_context,list_condition) # creation of the context data frame
202 context_asko<-unique(context_asko)
203 colnames(context_asko)[colnames(context_asko)=="list_context"]<-"context" # header formatting for askomics
204 colnames(context_asko)[colnames(context_asko)=="list_condition"]<-"condition" # header formatting for askomics
205 #asko$contrast<-contrast_asko # adding context data frame to asko object
206 #asko$context<-context_asko # adding context data frame to asko object
207 asko<-list("condition"=condition_asko, "contrast"=contrast_asko, "context"=context_asko)
208 colnames(context_asko)[colnames(context_asko)=="context"]<-"Context" # header formatting for askomics
209 colnames(context_asko)[colnames(context_asko)=="condition"]<-"has@Condition" # header formatting for askomics
210 colnames(contrast_asko)[colnames(contrast_asko)=="context1"]<-paste("context1_of", "Context", sep="@") # header formatting for askomics
211 colnames(contrast_asko)[colnames(contrast_asko)=="context2"]<-paste("context2_of", "Context", sep="@") # header formatting for askomics
212
213 ######## Files creation ########
214
215 write.table(data.frame("Condition"=row.names(condition_asko),condition_asko), paste0(parameters$out_dir,"/condition.asko.txt"), sep = parameters$sep, row.names = F, quote=F) # creation of condition file for asko
216 write.table(context_asko, paste0(parameters$out_dir,"/context.asko.txt"), sep=parameters$sep, col.names = T, row.names = F,quote=F) # creation of context file for asko
217 write.table(contrast_asko, paste0(parameters$out_dir,"/contrast.asko.txt"), sep=parameters$sep, col.names = T, row.names = F, quote=F) # creation of contrast file for asko
218 return(asko)
219 }
220
221 .NormCountsMean <- function(glmfit, ASKOlist, context){
222
223 lib_size_norm<-glmfit$samples$lib.size*glmfit$samples$norm.factors # normalization computation of all library sizes
224 set_condi<-ASKOlist$context$condition[ASKOlist$context$context==context] # retrieval of condition names associated to context
225
226 for (condition in set_condi){
227 sample_name<-rownames(glmfit$samples[glmfit$samples$condition==condition,]) # retrieval of the replicate names associated to conditions
228 subset_counts<-data.frame(row.names = row.names(glmfit$counts)) # initialization of data frame as subset of counts table
229 for(name in sample_name){
230 lib_sample_norm<-glmfit$samples[name,"lib.size"]*glmfit$samples[name,"norm.factors"] # normalization computation of sample library size
231 subset_counts$c<-glmfit$counts[,name] # addition in subset of sample counts column
232 subset_counts$c<-subset_counts$c*mean(lib_size_norm)/lib_sample_norm # normalization computation of sample counts
233 colnames(subset_counts)[colnames(subset_counts)=="c"]<-name # to rename the column with the condition name
234 }
235 mean_counts<-rowSums(subset_counts)/ncol(subset_counts) # computation of the mean
236 ASKOlist$stat.table$mean<-mean_counts # subset integration in the glm_result table
237 colnames(ASKOlist$stat.table)[colnames(ASKOlist$stat.table)=="mean"]<-paste(context,condition,sep = "/")
238 } # to rename the column with the context name
239 return(ASKOlist$stat.table) # return the glm object
240 }
241
242 AskoStats <- function (glm_test, fit, contrast, ASKOlist, dge,parameters){
243 contrasko<-ASKOlist$contrast$Contrast[row.names(ASKOlist$contrast)==contrast] # to retrieve the name of contrast from Asko object
244 contx1<-ASKOlist$contrast$context1[row.names(ASKOlist$contrast)==contrast] # to retrieve the name of 1st context from Asko object
245 contx2<-ASKOlist$contrast$context2[row.names(ASKOlist$contrast)==contrast] # to retrieve the name of 2nd context from Asko object
246
247 ASKO_stat<-glm_test$table
248 ASKO_stat$Test_id<-paste(contrasko, rownames(ASKO_stat), sep = "_") # addition of Test_id column = unique ID
249 ASKO_stat$contrast<-contrasko # addition of the contrast of the test
250 ASKO_stat$gene <- row.names(ASKO_stat) # addition of gene column = gene ID
251 ASKO_stat$FDR<-p.adjust(ASKO_stat$PValue, method=parameters$p_adj_method) # computation of False Discovery Rate
252
253 ASKO_stat$Significance=0 # Between context1 and context2 :
254 ASKO_stat$Significance[ASKO_stat$logFC< 0 & ASKO_stat$FDR<=parameters$threshold_FDR] = -1 # Significance values = -1 for down regulated genes
255 ASKO_stat$Significance[ASKO_stat$logFC> 0 & ASKO_stat$FDR<=parameters$threshold_FDR] = 1 # Significance values = 1 for up regulated genes
256
257 if(parameters$Expression==TRUE){
258 ASKO_stat$Expression=NA # addition of column "expression"
259 ASKO_stat$Expression[ASKO_stat$Significance==-1]<-paste(contx1, contx2, sep="<") # the value of attribute "Expression" is a string
260 ASKO_stat$Expression[ASKO_stat$Significance==1]<-paste(contx1, contx2, sep=">") # this attribute is easier to read the Significance
261 ASKO_stat$Expression[ASKO_stat$Significance==0]<-paste(contx1, contx2, sep="=") # of expression between two contexts
262 }
263 if(parameters$logFC==T){cola="logFC"}else{cola=NULL} #
264 if(parameters$FC==T){colb="FC";ASKO_stat$FC <- 2^abs(ASKO_stat$logFC)}else{colb=NULL} # computation of Fold Change from log2FC
265 if(parameters$Sign==T){colc="Significance"} #
266 if(parameters$logCPM==T){cold="logCPM"}else{cold=NULL} #
267 if(parameters$LR==T){cole="LR"}else{cole=NULL} #
268 if(parameters$FDR==T){colf="FDR"}else{colf=NULL}
269
270 ASKOlist$stat.table<-ASKO_stat[,c("Test_id","contrast","gene",cola,colb,"PValue", # adding table "stat.table" to the ASKOlist
271 "Expression",colc,cold,cole,colf)]
272 if(parameters$mean_counts==T){ # computation of the mean of normalized counts for conditions
273 ASKOlist$stat.table<-.NormCountsMean(fit, ASKOlist, contx1) # in the 1st context
274 ASKOlist$stat.table<-.NormCountsMean(fit, ASKOlist, contx2) # in the 2nd context
275 }
276 print(table(ASKO_stat$Expression))
277 colnames(ASKOlist$stat.table)[colnames(ASKOlist$stat.table)=="gene"] <- paste("is", "gene", sep="@") # header formatting for askomics
278 colnames(ASKOlist$stat.table)[colnames(ASKOlist$stat.table)=="contrast"] <- paste("measured_in", "Contrast", sep="@") # header formatting for askomics
279 o <- order(ASKOlist$stat.table$FDR) # ordering genes by FDR value
280 ASKOlist$stat.table<-ASKOlist$stat.table[o,]
281 #
282 dir.create(parameters$out_dir)
283 write.table(ASKOlist$stat.table,paste0(parameters$out_dir,"/", parameters$organism, contrasko, ".txt"), #
284 sep=parameters$sep, col.names = T, row.names = F, quote=FALSE)
285
286 if(parameters$heatmap==TRUE){
287 cpm_gstats<-cpm(dge, log=TRUE)[o,][1:parameters$numhigh,]
288 heatmap.2(cpm_gstats, cexRow=0.5, cexCol=0.8, scale="row", labCol=dge$samples$Name, xlab=contrast, Rowv = FALSE, dendrogram="col")
289 }
290
291 return(ASKOlist)
292
293 }
294
295 loadData <- function(parameters){
296
297 #####samples#####
298 samples<-read.table(parameters$sample_file, header=TRUE, sep="\t", row.names=1, comment.char = "#") #prise en compte des r?sultats de T2
299 if(is.null(parameters$select_sample)==FALSE){
300 if(parameters$regex==TRUE){
301 selected<-c()
302 for(sel in parameters$select_sample){
303 select<-grep(sel, rownames(samples))
304 if(is.null(selected)){selected=select}else{selected<-append(selected, select)}
305 }
306 samples<-samples[selected,]
307 }else{samples<-samples[parameters$select_sample,]}
308 }
309
310 if(is.null(parameters$rm_sample)==FALSE){
311 if(parameters$regex==TRUE){
312 for(rm in parameters$rm_sample){
313 removed<-grep(rm, rownames(samples))
314 # print(removed)
315 if(length(removed!=0)){samples<-samples[-removed,]}
316 }
317 }else{
318 for (rm in parameters$rm_sample) {
319 rm2<-match(rm, rownames(samples))
320 samples<-samples[-rm2,]
321 }
322 }
323 }
324 condition<-unique(samples$condition)
325 #print(condition)
326 color<-brewer.pal(length(condition), parameters$palette)
327 #print(color)
328 samples$color<-NA
329 j=0
330 for(name in condition){
331 j=j+1
332 samples$color[samples$condition==name]<-color[j]
333 }
334 #print(samples)
335
336
337 #####counts#####
338 if(is.null(parameters$fileofcount)){
339 dge<-readDGE(samples$file, labels=rownames(samples), columns=c(parameters$col_genes,parameters$col_counts), header=TRUE, comment.char="#")
340 dge<-DGEList(counts=dge$counts, samples=samples)
341 # dge$samples=samples
342 #countT<-dge$counts
343 # if(is.null(parameters$select_sample)==FALSE){
344 # slct<-grep(parameters$select_sample, colnames(countT))
345 # dge$counts<-dge$counts[,slct]
346 # dge$samples<-dge$samples[,slct]
347 # }
348 # if(is.null(parameters$rm_sample)==FALSE){
349 # rmc<-grep(parameters$rm_count, colnames(dge$counts))
350 # dge$counts<-dge$counts[,-rmc]
351 # print(ncol(dge$counts))
352 # rms<-grep(parameters$rm_sample, row.names(dge$samples))
353 # dge$samples<-dge$samples[-rms,]
354 # }
355 }else {
356 if(grepl(".csv", parameters$fileofcount)==TRUE){
357 count<-read.csv(parameters$fileofcount, header=TRUE, sep = "\t", row.names = parameters$col_genes)
358 }
359 else{
360 count<-read.table(parameters$fileofcount, header=TRUE, sep = "\t", row.names = parameters$col_genes, comment.char = "")
361 }
362 select_counts<-row.names(samples)
363 #countT<-count[,c(parameters$col_counts:length(colnames(count)))]
364 countT<-count[,select_counts]
365 dge<-DGEList(counts=countT, samples=samples)
366 # if(is.null(parameters$select_sample)==FALSE){
367 # slct<-grep(parameters$select_sample, colnames(countT))
368 # countT<-countT[,slct]
369 # }
370 # if(is.null(parameters$rm_count)==FALSE){
371 # rms<-grep(parameters$rm_count, colnames(countT))
372 # #print(rms)
373 # countT<-countT[,-rms]
374 #
375 # }
376 #print(nrow(samples))
377 #print(ncol(countT))
378 }
379
380 #####design#####
381 Group<-factor(samples$condition)
382 designExp<-model.matrix(~0+Group)
383 rownames(designExp) <- row.names(samples)
384 colnames(designExp) <- levels(Group)
385
386 #####contrast#####
387 contrastab<-read.table(parameters$contrast_file, sep="\t", header=TRUE, row.names = 1, comment.char="#", stringsAsFactors = FALSE)
388
389 rmcol<-list()
390 for(condition_name in row.names(contrastab)){
391 test<-match(condition_name, colnames(designExp),nomatch = 0)
392 if(test==0){
393 print(condition_name)
394 rm<-grep("0", contrastab[condition_name,], invert = T)
395 print(rm)
396 if(is.null(rmcol)){rmcol=rm}else{rmcol<-append(rmcol, rm)}
397 }
398 }
399 if (length(rmcol)> 0){
400 rmcol<-unlist(rmcol)
401 rmcol<-unique(rmcol)
402 rmcol=-rmcol
403 contrastab<-contrastab[,rmcol]
404 }
405
406 ord<-match(colnames(designExp),row.names(contrastab), nomatch = 0)
407 contrast_table<-contrastab[ord,]
408 colnum<-c()
409
410 for(contrast in colnames(contrast_table)){
411 set_cond1<-row.names(contrast_table)[contrast_table[,contrast]=="+"]
412 #print(set_cond1)
413 set_cond2<-row.names(contrast_table)[contrast_table[,contrast]=="-"]
414 #print(set_cond2)
415 if(length(set_cond1)!=length(set_cond2)){
416 contrast_table[,contrast][contrast_table[,contrast]=="+"]=signif(1/length(set_cond1),digits = 2)
417 contrast_table[,contrast][contrast_table[,contrast]=="-"]=signif(-1/length(set_cond2),digits = 2)
418 }
419 else {
420 contrast_table[,contrast][contrast_table[,contrast]=="+"]=1
421 contrast_table[,contrast][contrast_table[,contrast]=="-"]=-1
422 }
423 contrast_table[,contrast]<-as.numeric(contrast_table[,contrast])
424 }
425
426 #####annotation#####
427 #annotation <- read.csv(parameters$annotation_file, header = T, sep = '\t', quote = "", row.names = 1)
428
429 #data<-list("counts"=countT, "samples"=samples, "contrast"=contrast_table, "annot"=annotation, "design"=designExp)
430 #print(countT)
431 rownames(dge$samples)<-rownames(samples) # replace the renaming by files
432 data<-list("dge"=dge, "samples"=samples, "contrast"=contrast_table, "design"=designExp)
433 return(data)
434 }
435
436 GEfilt <- function(dge_list, parameters){
437 cpm<-cpm(dge_list)
438 logcpm<-cpm(dge_list, log=TRUE)
439 colnames(logcpm)<-rownames(dge_list$samples)
440 nsamples <- ncol(dge_list) # cr?ation nouveau plot
441 plot(density(logcpm[,1]),
442 col=as.character(dge_list$samples$color[1]), # plot exprimant la densit? de chaque g?ne
443 lwd=1,
444 ylim=c(0,0.21),
445 las=2,
446 main="A. Raw data",
447 xlab="Log-cpm") # en fonction de leurs valeurs d'expression
448 abline(v=0, lty=3)
449 for (i in 2:nsamples){ # on boucle sur chaque condition restante
450 den<-density(logcpm[,i]) # et les courbes sont rajout?es dans le plot
451 lines(den$x, col=as.character(dge_list$samples$color[i]), den$y, lwd=1) #
452 }
453 legend("topright", rownames(dge_list$samples),
454 text.col=as.character(dge_list$samples$color),
455 bty="n",
456 text.width=6,
457 cex=0.5)
458 # rowSums compte le nombre de score (cases) pour chaque colonne Sup ? 0.5
459 keep.exprs <- rowSums(cpm>parameters$threshold_cpm)>=parameters$replicate_cpm # en ajoutant >=3 cela donne un test conditionnel
460 filtered_counts <- dge_list[keep.exprs,,keep.lib.sizes=F] # si le comptage respecte la condition alors renvoie TRUE
461 filtered_cpm<-cpm(filtered_counts$counts, log=TRUE)
462
463 plot(density(filtered_cpm[,1]),
464 col=as.character(dge_list$samples$color[1]),
465 lwd=2,
466 ylim=c(0,0.21),
467 las=2,
468 main="B. Filtered data", xlab="Log-cpm")
469 abline(v=0, lty=3)
470 for (i in 2:nsamples){
471 den <- density(filtered_cpm[,i])
472 lines(den$x,col=as.character(dge_list$samples$color[i]), den$y, lwd=1)
473 }
474 legend("topright", rownames(dge_list$samples),
475 text.col=as.character(dge_list$samples$col),
476 bty="n",
477 text.width=6,
478 cex=0.5)
479 return(filtered_counts)
480 }
481
482 GEnorm <- function(filtered_GE, parameters){
483 filtered_cpm <- cpm(filtered_GE, log=TRUE) #nouveau calcul Cpm sur donn?es filtr?es, si log=true alors valeurs cpm en log2
484 colnames(filtered_cpm)<-rownames(filtered_GE$samples)
485 boxplot(filtered_cpm,
486 col=filtered_GE$samples$color, #boxplot des scores cpm non normalis?s
487 main="A. Before normalization",
488 cex.axis=0.5,
489 las=2,
490 ylab="Log-cpm")
491
492 norm_GE<-calcNormFactors(filtered_GE, method = parameters$normal_method) # normalisation de nos comptages par le methode TMM, estimation du taux de production d'un ARN # en estimant l'?chelle des facteurs entre echantillons -> but : pouvoir comparer nos ech entre eux
493
494 logcpm_norm <- cpm(norm_GE, log=TRUE)
495 colnames(logcpm_norm)<-rownames(filtered_GE$samples)
496 boxplot(logcpm_norm,
497 col=filtered_GE$samples$color,
498 main="B. After normalization",
499 cex.axis=0.5,
500 las=2,
501 ylab="Log-cpm")
502
503 return(norm_GE)
504 }
505
506 GEcorr <- function(dge, parameters){
507 lcpm<-cpm(dge, log=TRUE)
508 colnames(lcpm)<-rownames(dge$samples)
509 cormat<-cor(lcpm)
510 # color<- colorRampPalette(c("yellow", "white", "green"))(20)
511 color<-colorRampPalette(c("black","red","yellow","white"),space="rgb")(28)
512 heatmap(cormat, col=color, symm=TRUE,RowSideColors =as.character(dge$samples$color), ColSideColors = as.character(dge$samples$color))
513 #MDS
514 mds <- cmdscale(dist(t(lcpm)),k=3, eig=TRUE)
515 eigs<-round((mds$eig)*100/sum(mds$eig[mds$eig>0]),2)
516
517 mds1<-ggplot(as.data.frame(mds$points), aes(V1, V2, label = rownames(mds$points))) + labs(title="MDS Axes 1 and 2") + geom_point(color =as.character(dge$samples$color) ) + xlab(paste('dim 1 [', eigs[1], '%]')) +ylab(paste('dim 2 [', eigs[2], "%]")) + geom_label_repel(aes(label = rownames(mds$points)), color = 'black',size = 3.5)
518 print(mds1)
519 #ggsave("mds_corr1-2.tiff")
520 #ggtitle("MDS Axes 2 and 3")
521 mds2<-ggplot(as.data.frame(mds$points), aes(V2, V3, label = rownames(mds$points))) + labs(title="MDS Axes 2 and 3") + geom_point(color =as.character(dge$samples$color) ) + xlab(paste('dim 2 [', eigs[2], '%]')) +ylab(paste('dim 3 [', eigs[3], "%]")) + geom_label_repel(aes(label = rownames(mds$points)), color = 'black',size = 3.5)
522 print(mds2)
523 # ggtitle("MDS Axes 1 and 3")
524 #ggsave("mds_corr2-3.tiff")
525 mds3<-ggplot(as.data.frame(mds$points), aes(V1, V3, label = rownames(mds$points))) + labs(title="MDS Axes 1 and 3") + geom_point(color =as.character(dge$samples$color) ) + xlab(paste('dim 1 [', eigs[1], '%]')) +ylab(paste('dim 3 [', eigs[3], "%]")) + geom_label_repel(aes(label = rownames(mds$points)), color = 'black',size = 3.5)
526 print(mds3)
527 #ggsave("mds_corr1-3.tiff")
528 }
529
530 DEanalysis <- function(norm_GE, data_list, asko_list, parameters){
531
532 normGEdisp <- estimateDisp(norm_GE, data_list$design)
533 if(parameters$glm=="lrt"){
534 fit <- glmFit(normGEdisp, data_list$design, robust = T)
535 }
536 if(parameters$glm=="qlf"){
537 fit <- glmQLFit(normGEdisp, data_list$design, robust = T)
538 plotQLDisp(fit)
539 }
540
541 #plotMD.DGEGLM(fit)
542 #plotBCV(norm_GE)
543
544 #sum<-norm_GE$genes
545 for (contrast in colnames(data_list$contrast)){
546 print(asko_list$contrast$Contrast[contrast])
547 if(parameters$glm=="lrt"){
548 glm_test<-glmLRT(fit, contrast=data_list$contrast[,contrast])
549 }
550 if(parameters$glm=="qlf"){
551 glm_test<-glmQLFTest(fit, contrast=data_list$contrast[,contrast])
552 }
553
554 #sum[,contrast]<-decideTestsDGE(glm, adjust.method = parameters$p_adj_method, lfc=1)
555 #print(table(sum[,contrast]))
556 AskoStats(glm_test, fit, contrast, asko_list,normGEdisp,parameters)
557 }
558 }
559
560 Asko_start <-function(){
561 library(limma)
562 library(statmod)
563 library(edgeR)
564 library(ggplot2)
565 library(RColorBrewer)
566 library(ggrepel)
567 library(gplots)
568 library(stringr)
569 library(optparse)
570 option_list = list(
571 make_option(c("-o", "--out"), type="character", default="out.pdf",dest="output_pdf",
572 help="output file name [default= %default]", metavar="character"),
573 make_option(c("-d", "--dir"), type="character", default=".",dest="dir_path",
574 help="data directory path [default= %default]", metavar="character"),
575 make_option("--outdir", type="character", default=".",dest="out_dir",
576 help="outputs directory [default= %default]", metavar="character"),
577 make_option(c("-O", "--org"), type="character", default="Asko", dest="organism",
578 help="Organism name [default= %default]", metavar="character"),
579 make_option(c("-f", "--fileofcount"), type="character", default=NULL, dest="fileofcount",
580 help="file of counts [default= %default]", metavar="character"),
581 make_option(c("-G", "--col_genes"), type="integer", default=1, dest="col_genes",
582 help="col of ids in count files [default= %default]", metavar="integer"),
583 make_option(c("-C", "--col_counts"), type="integer", default=7,dest="col_counts",
584 help="col of counts in count files [default= %default (featureCounts) ]", metavar="integer"),
585 make_option(c("-t", "--sep"), type="character", default="\t", dest="sep",
586 help="col separator [default= %default]", metavar="character"),
587 make_option(c("-a", "--annotation"), type="character", default="annotation.txt", dest="annotation_file",
588 help="annotation file [default= %default]", metavar="character"),
589 make_option(c("-s", "--sample"), type="character", default="Samples.txt", dest="sample_file",
590 help="Samples file [default= %default]", metavar="character"),
591 make_option(c("-c", "--contrasts"), type="character", default="Contrasts.txt",dest="contrast_file",
592 help="Contrasts file [default= %default]", metavar="character"),
593 make_option(c("-k", "--mk_context"), type="logical", default=FALSE,dest="mk_context",
594 help="generate automatically the context names [default= %default]", metavar="logical"),
595 make_option(c("-p", "--palette"), type="character", default="Set2", dest="palette",
596 help="Color palette (ggplot)[default= %default]", metavar="character"),
597 make_option(c("-R", "--regex"), type="logical", default=FALSE, dest="regex",
598 help="use regex when selecting/removing samples [default= %default]", metavar="logical"),
599 make_option(c("-S", "--select"), type="character", default=NULL, dest="select_sample",
600 help="selected samples [default= %default]", metavar="character"),
601 make_option(c("-r", "--remove"), type="character", default=NULL, dest="rm_sample",
602 help="removed samples [default= %default]", metavar="character"),
603 make_option(c("--th_cpm"), type="double", default=0.5, dest="threshold_cpm",
604 help="CPM's threshold [default= %default]", metavar="double"),
605 make_option(c("--rep"), type="integer", default=3, dest="replicate_cpm",
606 help="Minimum number of replicates [default= %default]", metavar="integer"),
607 make_option(c("--th_FDR"), type="double", default=0.05, dest="threshold_FDR",
608 help="FDR threshold [default= %default]", metavar="double"),
609 make_option(c("-n", "--normalization"), type="character", default="TMM", dest="normal_method",
610 help="normalization method (TMM/RLE/upperquartile/none) [default= %default]", metavar="character"),
611 make_option(c("--adj"), type="character", default="fdr", dest="p_adj_method",
612 help="p-value adjust method (holm/hochberg/hommel/bonferroni/BH/BY/fdr/none) [default= %default]", metavar="character"),
613 make_option("--glm", type="character", default="qlf", dest="glm",
614 help=" GLM method (lrt/qlf) [default= %default]", metavar="character"),
615 make_option(c("--lfc"), type="logical", default="TRUE", dest="logFC",
616 help="logFC in the summary table [default= %default]", metavar="logical"),
617 make_option("--fc", type="logical", default="TRUE", dest="FC",
618 help="FC in the summary table [default= %default]", metavar="logical"),
619 make_option(c("--lcpm"), type="logical", default="FALSE", dest="logCPM",
620 help="logCPm in the summary table [default= %default]", metavar="logical"),
621 make_option("--fdr", type="logical", default="TRUE", dest="FDR",
622 help="FDR in the summary table [default= %default]", metavar="logical"),
623 make_option("--lr", type="logical", default="FALSE", dest="LR",
624 help="LR in the summary table [default= %default]", metavar="logical"),
625 make_option(c("--sign"), type="logical", default="TRUE", dest="Sign",
626 help="Significance (1/0/-1) in the summary table [default= %default]", metavar="logical"),
627 make_option(c("--expr"), type="logical", default="TRUE", dest="Expression",
628 help="Significance expression in the summary table [default= %default]", metavar="logical"),
629 make_option(c("--mc"), type="logical", default="TRUE", dest="mean_counts",
630 help="Mean counts in the summary table [default= %default]", metavar="logical"),
631 make_option(c("--hm"), type="logical", default="TRUE", dest="heatmap",
632 help="generation of the expression heatmap [default= %default]", metavar="logical"),
633 make_option(c("--nh"), type="integer", default="50", dest="numhigh",
634 help="number of genes in the heatmap [default= %default]", metavar="integer")
635 )
636 opt_parser = OptionParser(option_list=option_list)
637 parameters = parse_args(opt_parser)
638
639 if(is.null(parameters$rm_sample) == FALSE ) {
640 str_replace_all(parameters$rm_sample, " ", "")
641 parameters$rm_sample<-strsplit2(parameters$rm_sample, ",")
642 }
643
644 if(is.null(parameters$select_sample) == FALSE ) {
645 str_replace_all(parameters$select_sample, " ", "")
646 parameters$select_sample<-strsplit2(parameters$select_sample, ",")
647 }
648
649 dir.create(parameters$out_dir)
650 return(parameters)
651 }