Mercurial > repos > genouest > askor_de
diff AskoR.R @ 0:ceef9bc6bbc7 draft
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/askor commit 08a187f91ba050d584e586d2fcc99d984dac607c
author | genouest |
---|---|
date | Thu, 12 Apr 2018 05:23:45 -0400 |
parents | |
children | 877d2be25a6a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AskoR.R Thu Apr 12 05:23:45 2018 -0400 @@ -0,0 +1,651 @@ +asko3c <- function(data_list){ + asko<-list() + + ######### Condition ############ + + condition<-unique(data_list$samples$condition) # retrieval of different condition's names + col1<-which(colnames(data_list$samples)=="condition") # determination of number of the column "condition" + colcol<-which(colnames(data_list$samples)=="color") + if(is.null(parameters$fileofcount)){ + col2<-which(colnames(data_list$samples)=="file") # determination of number of the column "replicate" + column_name<-colnames(data_list$samples[,c(-col1,-col2,-colcol)]) # retrieval of column names needful to create the file condition + }else{column_name<-colnames(data_list$samples[,c(-col1,-colcol)])} + condition_asko<-data.frame(row.names=condition) # initialization of the condition's data frame + #level<-list() # initialization of the list will contain the level + # of each experimental factor + for (name in column_name){ # for each experimental factor : + # if(str_detect(name, "condition")){ # for the column of conditions, the level is fixed to 0 because + # level<-append(level, 0) # "condition" must be the first column of the data frame + # }else{ # + # level<-append(level, length(levels(data_list$samples[,name]))) # adding to the list the level of other experimental factors + # } + # + condition_asko$n<-NA # initialization of new column in the condition's data frame + colnames(condition_asko)[colnames(condition_asko)=="n"]<-name # to rename the new column with with the name of experimental factor + for(condition_name in condition){ # for each condition's names + condition_asko[condition_name,name]<-as.character(unique(data_list$samples[data_list$samples$condition==condition_name, name])) + } # filling the condition's data frame + } + # order_level<-order(unlist(level)) # list to vector + # condition_asko<-condition_asko[,order_level] # order columns according to their level + #asko$condition<-condition_asko # adding data frame of conditions to asko object + + #print(condition_asko) + + + #############contrast + context################## + i=0 + + contrast_asko<-data.frame(row.names = colnames(data_list$contrast)) # initialization of the contrast's data frame + contrast_asko$Contrast<-NA # all columns are created et initialized with + contrast_asko$context1<-NA # NA values + contrast_asko$context2<-NA # + + list_context<-list() # initialization of context and condition lists + list_condition<-list() # will be used to create the context data frame + if(parameters$mk_context==TRUE){ + for (contrast in colnames(data_list$contrast)){ # for each contrast : + i=i+1 # contrast data frame will be filled line by line + #print(contrast) + set_cond1<-row.names(data_list$contrast)[data_list$contrast[,contrast]>0] # retrieval of 1st set of condition's names implicated in a given contrast + set_cond2<-row.names(data_list$contrast)[data_list$contrast[,contrast]<0] # retrieval of 2nd set of condition's names implicated in a given contrast + parameters<-colnames(condition_asko) # retrieval of names of experimental factor + print(paste("set_cond1 : ", set_cond1, sep = "")) + # print(length(set_cond1)) + print(paste("set_cond2 : ", set_cond2, sep = "")) + # print(length(set_cond2)) + if(length(set_cond1)==1){complex1=F}else{complex1=T}# to determine if we have complex contrast (multiple conditions + if(length(set_cond2)==1){complex2=F}else{complex2=T}# compared to multiple conditions) or not + #print(complex1) + if(complex1==F && complex2==F){ # Case 1: one condition against one condition + contrast_asko[i,"context1"]<-set_cond1 # filling contrast data frame with the name of the 1st context + contrast_asko[i,"context2"]<-set_cond2 # filling contrast data frame with the name of the 2nd context + contrast_name<-paste(set_cond1,set_cond2, sep = "vs") # creation of contrast name by associating the names of contexts + contrast_asko[i,"Contrast"]<-contrast_name # filling contrast data frame with contrast name + list_context<-append(list_context, set_cond1) # + list_condition<-append(list_condition, set_cond1) # adding respectively to the lists "context" and "condition" the context name + list_context<-append(list_context, set_cond2) # and the condition name associated + list_condition<-append(list_condition, set_cond2) # + } + if(complex1==F && complex2==T){ # Case 2: one condition against multiple condition + contrast_asko[i,"context1"]<-set_cond1 # filling contrast data frame with the name of the 1st context + list_context<-append(list_context, set_cond1) # adding respectively to the lists "context" and "condition" the 1st context + list_condition<-append(list_condition, set_cond1) # name and the condition name associated + l=0 + # "common_factor" will contain the common experimental factors shared by + common_factor=list() # conditions belonging to the complex context + for (param_names in parameters){ # for each experimental factor + facteur<-unique(c(condition_asko[,param_names])) # retrieval of possible values for the experimental factor + l=l+1 # + for(value in facteur){ # for each possible values + verif<-unique(str_detect(set_cond2, value)) # verification of the presence of values in each condition contained in the set + if(length(verif)==1 && verif==TRUE){common_factor[l]<-value} # if verif contains only TRUE, value of experimental factor + } # is added as common factor + } + if(length(common_factor)>1){ # if there are several common factor + common_factor<-toString(common_factor) # the list is converted to string + contx<-str_replace(common_factor,", ","") + contx<-str_replace_all(contx, "NULL", "")}else{contx<-common_factor} # and all common factor are concatenated to become the name of context + contrast_asko[i,"context2"]<-contx # filling contrast data frame with the name of the 2nd context + contrast_name<-paste(set_cond1,contx, sep = "vs") # concatenation of context names to make the contrast name + contrast_asko[i,"Contrast"]<-contrast_name # filling contrast data frame with the contrast name + for(j in 1:length(set_cond2)){ # for each condition contained in the complex context (2nd): + list_context<-append(list_context, contx) # adding condition name with the context name associated + list_condition<-append(list_condition, set_cond2[j]) # to their respective list + } + } + if(complex1==T && complex2==F){ # Case 3: multiple conditions against one condition + contrast_asko[i,"context2"]<-set_cond2 # filling contrast data frame with the name of the 2nd context + list_context<-append(list_context, set_cond2) # adding respectively to the lists "context" and "condition" the 2nd context + list_condition<-append(list_condition, set_cond2) # name and the 2nd condition name associated + l=0 + # "common_factor" will contain the common experimental factors shared by + common_factor=list() # conditions belonging to the complex context + for (param_names in parameters){ # for each experimental factor: + facteur<-unique(c(condition_asko[,param_names])) # retrieval of possible values for the experimental factor + l=l+1 + for(value in facteur){ # for each possible values: + verif<-unique(str_detect(set_cond1, value)) # verification of the presence of values in each condition contained in the set + if(length(verif)==1 && verif==TRUE){common_factor[l]<-value} # if verif contains only TRUE, value of experimental factor + } # is added as common factor + } + if(length(common_factor)>1){ # if there are several common factor + common_factor<-toString(common_factor) # the list is converted to string + contx<-str_replace(common_factor,", ","") + contx<-str_replace_all(contx, "NULL", "")}else{contx<-common_factor} # and all common factor are concatenated to become the name of context + contrast_asko[i,"context1"]<-contx # filling contrast data frame with the name of the 1st context + contrast_name<-paste(contx,set_cond2, sep = "vs") # concatenation of context names to make the contrast name + contrast_asko[i,"Contrast"]<-contrast_name # filling contrast data frame with the contrast name + for(j in 1:length(set_cond1)){ # for each condition contained in the complex context (1st): + list_context<-append(list_context, contx) # adding condition name with the context name associated + list_condition<-append(list_condition, set_cond1[j]) # to their respective list + } + } + if(complex1==T && complex2==T){ # Case 4: multiple conditions against multiple conditions + m=0 # + n=0 # + common_factor1=list() # list of common experimental factors shared by conditions of the 1st context + common_factor2=list() # list of common experimental factors shared by conditions of the 2nd context + w=1 + for (param_names in parameters){ # for each experimental factor: + print(w) + w=w+1 + facteur<-unique(c(condition_asko[,param_names])) # retrieval of possible values for the experimental factor + print(paste("facteur : ", facteur, sep="")) + for(value in facteur){ # for each possible values: + print(value) + #print(class(value)) + #print(set_cond1) + verif1<-unique(str_detect(set_cond1, value)) # verification of the presence of values in each condition + # contained in the 1st context + verif2<-unique(str_detect(set_cond2, value)) # verification of the presence of values in each condition + # contained in the 2nd context + + if(length(verif1)==1 && verif1==TRUE){m=m+1;common_factor1[m]<-value} # if verif=only TRUE, value of experimental factor is added as common factor + if(length(verif2)==1 && verif2==TRUE){n=n+1;common_factor2[n]<-value} # if verif=only TRUE, value of experimental factor is added as common factor + } + } + print(paste("common_factor1 : ",common_factor1,sep="")) + print(paste("common_factor2 : ",common_factor2,sep="")) + + if(length(common_factor1)>1){ # if there are several common factor for conditions in the 1st context + common_factor1<-toString(common_factor1) # conversion list to string + contx1<-str_replace(common_factor1,", ","")}else{contx1<-common_factor1}# all common factor are concatenated to become the name of context + contx1<-str_replace_all(contx1, "NULL", "") + print(paste("contx1 : ", contx1, sep="")) + if(length(common_factor2)>1){ # if there are several common factor for conditions in the 2nd context + common_factor2<-toString(common_factor2) # conversion list to string + contx2<-str_replace(common_factor2,", ","")}else{contx2<-common_factor2}# all common factor are concatenated to become the name of context + contx2<-str_replace_all(contx2, "NULL", "") + print(paste("contx2 : ", contx2, sep="")) + contrast_asko[i,"context1"]<-contx1 # filling contrast data frame with the name of the 1st context + contrast_asko[i,"context2"]<-contx2 # filling contrast data frame with the name of the 2nd context + contrast_asko[i,"Contrast"]<-paste(contx1,contx2, sep = "vs") # filling contrast data frame with the name of the contrast + for(j in 1:length(set_cond1)){ # for each condition contained in the complex context (1st): + list_context<-append(list_context, contx1) # verification of the presence of values in each condition + list_condition<-append(list_condition, set_cond1[j]) # contained in the 1st context + } + for(j in 1:length(set_cond2)){ # for each condition contained in the complex context (2nd): + list_context<-append(list_context, contx2) # verification of the presence of values in each condition + list_condition<-append(list_condition, set_cond2[j]) # contained in the 1st context + } + } + } + } + else{ + for (contrast in colnames(data_list$contrast)){ + i=i+1 + contexts=strsplit2(contrast,"vs") + contrast_asko[i,"Contrast"]<-contrast + contrast_asko[i,"context1"]=contexts[1] + contrast_asko[i,"context2"]=contexts[2] + set_cond1<-row.names(data_list$contrast)[data_list$contrast[,contrast]>0] + set_cond2<-row.names(data_list$contrast)[data_list$contrast[,contrast]<0] + for (cond1 in set_cond1){ +# print(contexts[1]) + # print(cond1) + list_context<-append(list_context, contexts[1]) + list_condition<-append(list_condition, cond1) + } + for (cond2 in set_cond2){ + list_context<-append(list_context, contexts[2]) + list_condition<-append(list_condition, cond2) + } + } + } + + list_context<-unlist(list_context) # conversion list to vector + list_condition<-unlist(list_condition) # conversion list to vector +# print(list_condition) +# print(list_context) + context_asko<-data.frame(list_context,list_condition) # creation of the context data frame + context_asko<-unique(context_asko) + colnames(context_asko)[colnames(context_asko)=="list_context"]<-"context" # header formatting for askomics + colnames(context_asko)[colnames(context_asko)=="list_condition"]<-"condition" # header formatting for askomics + #asko$contrast<-contrast_asko # adding context data frame to asko object + #asko$context<-context_asko # adding context data frame to asko object + asko<-list("condition"=condition_asko, "contrast"=contrast_asko, "context"=context_asko) + colnames(context_asko)[colnames(context_asko)=="context"]<-"Context" # header formatting for askomics + colnames(context_asko)[colnames(context_asko)=="condition"]<-"has@Condition" # header formatting for askomics + colnames(contrast_asko)[colnames(contrast_asko)=="context1"]<-paste("context1_of", "Context", sep="@") # header formatting for askomics + colnames(contrast_asko)[colnames(contrast_asko)=="context2"]<-paste("context2_of", "Context", sep="@") # header formatting for askomics + + ######## Files creation ######## + + write.table(data.frame("Condition"=row.names(condition_asko),condition_asko), paste0(parameters$out_dir,"/condition.asko.txt"), sep = parameters$sep, row.names = F, quote=F) # creation of condition file for asko + write.table(context_asko, paste0(parameters$out_dir,"/context.asko.txt"), sep=parameters$sep, col.names = T, row.names = F,quote=F) # creation of context file for asko + write.table(contrast_asko, paste0(parameters$out_dir,"/contrast.asko.txt"), sep=parameters$sep, col.names = T, row.names = F, quote=F) # creation of contrast file for asko + return(asko) +} + +.NormCountsMean <- function(glmfit, ASKOlist, context){ + + lib_size_norm<-glmfit$samples$lib.size*glmfit$samples$norm.factors # normalization computation of all library sizes + set_condi<-ASKOlist$context$condition[ASKOlist$context$context==context] # retrieval of condition names associated to context + + for (condition in set_condi){ + sample_name<-rownames(glmfit$samples[glmfit$samples$condition==condition,]) # retrieval of the replicate names associated to conditions + subset_counts<-data.frame(row.names = row.names(glmfit$counts)) # initialization of data frame as subset of counts table + for(name in sample_name){ + lib_sample_norm<-glmfit$samples[name,"lib.size"]*glmfit$samples[name,"norm.factors"] # normalization computation of sample library size + subset_counts$c<-glmfit$counts[,name] # addition in subset of sample counts column + subset_counts$c<-subset_counts$c*mean(lib_size_norm)/lib_sample_norm # normalization computation of sample counts + colnames(subset_counts)[colnames(subset_counts)=="c"]<-name # to rename the column with the condition name + } + mean_counts<-rowSums(subset_counts)/ncol(subset_counts) # computation of the mean + ASKOlist$stat.table$mean<-mean_counts # subset integration in the glm_result table + colnames(ASKOlist$stat.table)[colnames(ASKOlist$stat.table)=="mean"]<-paste(context,condition,sep = "/") + } # to rename the column with the context name + return(ASKOlist$stat.table) # return the glm object +} + +AskoStats <- function (glm_test, fit, contrast, ASKOlist, dge,parameters){ + contrasko<-ASKOlist$contrast$Contrast[row.names(ASKOlist$contrast)==contrast] # to retrieve the name of contrast from Asko object + contx1<-ASKOlist$contrast$context1[row.names(ASKOlist$contrast)==contrast] # to retrieve the name of 1st context from Asko object + contx2<-ASKOlist$contrast$context2[row.names(ASKOlist$contrast)==contrast] # to retrieve the name of 2nd context from Asko object + + ASKO_stat<-glm_test$table + ASKO_stat$Test_id<-paste(contrasko, rownames(ASKO_stat), sep = "_") # addition of Test_id column = unique ID + ASKO_stat$contrast<-contrasko # addition of the contrast of the test + ASKO_stat$gene <- row.names(ASKO_stat) # addition of gene column = gene ID + ASKO_stat$FDR<-p.adjust(ASKO_stat$PValue, method=parameters$p_adj_method) # computation of False Discovery Rate + + ASKO_stat$Significance=0 # Between context1 and context2 : + ASKO_stat$Significance[ASKO_stat$logFC< 0 & ASKO_stat$FDR<=parameters$threshold_FDR] = -1 # Significance values = -1 for down regulated genes + ASKO_stat$Significance[ASKO_stat$logFC> 0 & ASKO_stat$FDR<=parameters$threshold_FDR] = 1 # Significance values = 1 for up regulated genes + + if(parameters$Expression==TRUE){ + ASKO_stat$Expression=NA # addition of column "expression" + ASKO_stat$Expression[ASKO_stat$Significance==-1]<-paste(contx1, contx2, sep="<") # the value of attribute "Expression" is a string + ASKO_stat$Expression[ASKO_stat$Significance==1]<-paste(contx1, contx2, sep=">") # this attribute is easier to read the Significance + ASKO_stat$Expression[ASKO_stat$Significance==0]<-paste(contx1, contx2, sep="=") # of expression between two contexts + } + if(parameters$logFC==T){cola="logFC"}else{cola=NULL} # + if(parameters$FC==T){colb="FC";ASKO_stat$FC <- 2^abs(ASKO_stat$logFC)}else{colb=NULL} # computation of Fold Change from log2FC + if(parameters$Sign==T){colc="Significance"} # + if(parameters$logCPM==T){cold="logCPM"}else{cold=NULL} # + if(parameters$LR==T){cole="LR"}else{cole=NULL} # + if(parameters$FDR==T){colf="FDR"}else{colf=NULL} + + ASKOlist$stat.table<-ASKO_stat[,c("Test_id","contrast","gene",cola,colb,"PValue", # adding table "stat.table" to the ASKOlist + "Expression",colc,cold,cole,colf)] + if(parameters$mean_counts==T){ # computation of the mean of normalized counts for conditions + ASKOlist$stat.table<-.NormCountsMean(fit, ASKOlist, contx1) # in the 1st context + ASKOlist$stat.table<-.NormCountsMean(fit, ASKOlist, contx2) # in the 2nd context + } + print(table(ASKO_stat$Expression)) + colnames(ASKOlist$stat.table)[colnames(ASKOlist$stat.table)=="gene"] <- paste("is", "gene", sep="@") # header formatting for askomics + colnames(ASKOlist$stat.table)[colnames(ASKOlist$stat.table)=="contrast"] <- paste("measured_in", "Contrast", sep="@") # header formatting for askomics + o <- order(ASKOlist$stat.table$FDR) # ordering genes by FDR value + ASKOlist$stat.table<-ASKOlist$stat.table[o,] + # + dir.create(parameters$out_dir) + write.table(ASKOlist$stat.table,paste0(parameters$out_dir,"/", parameters$organism, contrasko, ".txt"), # + sep=parameters$sep, col.names = T, row.names = F, quote=FALSE) + + if(parameters$heatmap==TRUE){ + cpm_gstats<-cpm(dge, log=TRUE)[o,][1:parameters$numhigh,] + heatmap.2(cpm_gstats, cexRow=0.5, cexCol=0.8, scale="row", labCol=dge$samples$Name, xlab=contrast, Rowv = FALSE, dendrogram="col") + } + + return(ASKOlist) + +} + +loadData <- function(parameters){ + + #####samples##### + samples<-read.table(parameters$sample_file, header=TRUE, sep="\t", row.names=1, comment.char = "#") #prise en compte des r?sultats de T2 + if(is.null(parameters$select_sample)==FALSE){ + if(parameters$regex==TRUE){ + selected<-c() + for(sel in parameters$select_sample){ + select<-grep(sel, rownames(samples)) + if(is.null(selected)){selected=select}else{selected<-append(selected, select)} + } + samples<-samples[selected,] + }else{samples<-samples[parameters$select_sample,]} + } + + if(is.null(parameters$rm_sample)==FALSE){ + if(parameters$regex==TRUE){ + for(rm in parameters$rm_sample){ + removed<-grep(rm, rownames(samples)) +# print(removed) + if(length(removed!=0)){samples<-samples[-removed,]} + } + }else{ + for (rm in parameters$rm_sample) { + rm2<-match(rm, rownames(samples)) + samples<-samples[-rm2,] + } + } + } + condition<-unique(samples$condition) + #print(condition) + color<-brewer.pal(length(condition), parameters$palette) + #print(color) + samples$color<-NA + j=0 + for(name in condition){ + j=j+1 + samples$color[samples$condition==name]<-color[j] + } + #print(samples) + + + #####counts##### + if(is.null(parameters$fileofcount)){ + dge<-readDGE(samples$file, labels=rownames(samples), columns=c(parameters$col_genes,parameters$col_counts), header=TRUE, comment.char="#") + dge<-DGEList(counts=dge$counts, samples=samples) + # dge$samples=samples + #countT<-dge$counts + # if(is.null(parameters$select_sample)==FALSE){ + # slct<-grep(parameters$select_sample, colnames(countT)) + # dge$counts<-dge$counts[,slct] + # dge$samples<-dge$samples[,slct] + # } + # if(is.null(parameters$rm_sample)==FALSE){ + # rmc<-grep(parameters$rm_count, colnames(dge$counts)) + # dge$counts<-dge$counts[,-rmc] + # print(ncol(dge$counts)) + # rms<-grep(parameters$rm_sample, row.names(dge$samples)) + # dge$samples<-dge$samples[-rms,] + # } + }else { + if(grepl(".csv", parameters$fileofcount)==TRUE){ + count<-read.csv(parameters$fileofcount, header=TRUE, sep = "\t", row.names = parameters$col_genes) + } + else{ + count<-read.table(parameters$fileofcount, header=TRUE, sep = "\t", row.names = parameters$col_genes, comment.char = "") + } + select_counts<-row.names(samples) + #countT<-count[,c(parameters$col_counts:length(colnames(count)))] + countT<-count[,select_counts] + dge<-DGEList(counts=countT, samples=samples) + # if(is.null(parameters$select_sample)==FALSE){ + # slct<-grep(parameters$select_sample, colnames(countT)) + # countT<-countT[,slct] + # } + # if(is.null(parameters$rm_count)==FALSE){ + # rms<-grep(parameters$rm_count, colnames(countT)) + # #print(rms) + # countT<-countT[,-rms] + # + # } + #print(nrow(samples)) + #print(ncol(countT)) + } + + #####design##### + Group<-factor(samples$condition) + designExp<-model.matrix(~0+Group) + rownames(designExp) <- row.names(samples) + colnames(designExp) <- levels(Group) + + #####contrast##### + contrastab<-read.table(parameters$contrast_file, sep="\t", header=TRUE, row.names = 1, comment.char="#", stringsAsFactors = FALSE) + + rmcol<-list() + for(condition_name in row.names(contrastab)){ + test<-match(condition_name, colnames(designExp),nomatch = 0) + if(test==0){ + print(condition_name) + rm<-grep("0", contrastab[condition_name,], invert = T) + print(rm) + if(is.null(rmcol)){rmcol=rm}else{rmcol<-append(rmcol, rm)} + } + } + if (length(rmcol)> 0){ + rmcol<-unlist(rmcol) + rmcol<-unique(rmcol) + rmcol=-rmcol + contrastab<-contrastab[,rmcol] + } + + ord<-match(colnames(designExp),row.names(contrastab), nomatch = 0) + contrast_table<-contrastab[ord,] + colnum<-c() + + for(contrast in colnames(contrast_table)){ + set_cond1<-row.names(contrast_table)[contrast_table[,contrast]=="+"] + #print(set_cond1) + set_cond2<-row.names(contrast_table)[contrast_table[,contrast]=="-"] + #print(set_cond2) + if(length(set_cond1)!=length(set_cond2)){ + contrast_table[,contrast][contrast_table[,contrast]=="+"]=signif(1/length(set_cond1),digits = 2) + contrast_table[,contrast][contrast_table[,contrast]=="-"]=signif(-1/length(set_cond2),digits = 2) + } + else { + contrast_table[,contrast][contrast_table[,contrast]=="+"]=1 + contrast_table[,contrast][contrast_table[,contrast]=="-"]=-1 + } + contrast_table[,contrast]<-as.numeric(contrast_table[,contrast]) + } + + #####annotation##### + #annotation <- read.csv(parameters$annotation_file, header = T, sep = '\t', quote = "", row.names = 1) + + #data<-list("counts"=countT, "samples"=samples, "contrast"=contrast_table, "annot"=annotation, "design"=designExp) + #print(countT) + rownames(dge$samples)<-rownames(samples) # replace the renaming by files + data<-list("dge"=dge, "samples"=samples, "contrast"=contrast_table, "design"=designExp) + return(data) +} + +GEfilt <- function(dge_list, parameters){ + cpm<-cpm(dge_list) + logcpm<-cpm(dge_list, log=TRUE) + colnames(logcpm)<-rownames(dge_list$samples) + nsamples <- ncol(dge_list) # cr?ation nouveau plot + plot(density(logcpm[,1]), + col=as.character(dge_list$samples$color[1]), # plot exprimant la densit? de chaque g?ne + lwd=1, + ylim=c(0,0.21), + las=2, + main="A. Raw data", + xlab="Log-cpm") # en fonction de leurs valeurs d'expression + abline(v=0, lty=3) + for (i in 2:nsamples){ # on boucle sur chaque condition restante + den<-density(logcpm[,i]) # et les courbes sont rajout?es dans le plot + lines(den$x, col=as.character(dge_list$samples$color[i]), den$y, lwd=1) # + } + legend("topright", rownames(dge_list$samples), + text.col=as.character(dge_list$samples$color), + bty="n", + text.width=6, + cex=0.5) + # rowSums compte le nombre de score (cases) pour chaque colonne Sup ? 0.5 + keep.exprs <- rowSums(cpm>parameters$threshold_cpm)>=parameters$replicate_cpm # en ajoutant >=3 cela donne un test conditionnel + filtered_counts <- dge_list[keep.exprs,,keep.lib.sizes=F] # si le comptage respecte la condition alors renvoie TRUE + filtered_cpm<-cpm(filtered_counts$counts, log=TRUE) + + plot(density(filtered_cpm[,1]), + col=as.character(dge_list$samples$color[1]), + lwd=2, + ylim=c(0,0.21), + las=2, + main="B. Filtered data", xlab="Log-cpm") + abline(v=0, lty=3) + for (i in 2:nsamples){ + den <- density(filtered_cpm[,i]) + lines(den$x,col=as.character(dge_list$samples$color[i]), den$y, lwd=1) + } + legend("topright", rownames(dge_list$samples), + text.col=as.character(dge_list$samples$col), + bty="n", + text.width=6, + cex=0.5) + return(filtered_counts) +} + +GEnorm <- function(filtered_GE, parameters){ + filtered_cpm <- cpm(filtered_GE, log=TRUE) #nouveau calcul Cpm sur donn?es filtr?es, si log=true alors valeurs cpm en log2 + colnames(filtered_cpm)<-rownames(filtered_GE$samples) + boxplot(filtered_cpm, + col=filtered_GE$samples$color, #boxplot des scores cpm non normalis?s + main="A. Before normalization", + cex.axis=0.5, + las=2, + ylab="Log-cpm") + + norm_GE<-calcNormFactors(filtered_GE, method = parameters$normal_method) # normalisation de nos comptages par le methode TMM, estimation du taux de production d'un ARN # en estimant l'?chelle des facteurs entre echantillons -> but : pouvoir comparer nos ech entre eux + + logcpm_norm <- cpm(norm_GE, log=TRUE) + colnames(logcpm_norm)<-rownames(filtered_GE$samples) + boxplot(logcpm_norm, + col=filtered_GE$samples$color, + main="B. After normalization", + cex.axis=0.5, + las=2, + ylab="Log-cpm") + + return(norm_GE) +} + +GEcorr <- function(dge, parameters){ + lcpm<-cpm(dge, log=TRUE) + colnames(lcpm)<-rownames(dge$samples) + cormat<-cor(lcpm) + # color<- colorRampPalette(c("yellow", "white", "green"))(20) + color<-colorRampPalette(c("black","red","yellow","white"),space="rgb")(28) + heatmap(cormat, col=color, symm=TRUE,RowSideColors =as.character(dge$samples$color), ColSideColors = as.character(dge$samples$color)) + #MDS + mds <- cmdscale(dist(t(lcpm)),k=3, eig=TRUE) + eigs<-round((mds$eig)*100/sum(mds$eig[mds$eig>0]),2) + + mds1<-ggplot(as.data.frame(mds$points), aes(V1, V2, label = rownames(mds$points))) + labs(title="MDS Axes 1 and 2") + geom_point(color =as.character(dge$samples$color) ) + xlab(paste('dim 1 [', eigs[1], '%]')) +ylab(paste('dim 2 [', eigs[2], "%]")) + geom_label_repel(aes(label = rownames(mds$points)), color = 'black',size = 3.5) + print(mds1) + #ggsave("mds_corr1-2.tiff") + #ggtitle("MDS Axes 2 and 3") + mds2<-ggplot(as.data.frame(mds$points), aes(V2, V3, label = rownames(mds$points))) + labs(title="MDS Axes 2 and 3") + geom_point(color =as.character(dge$samples$color) ) + xlab(paste('dim 2 [', eigs[2], '%]')) +ylab(paste('dim 3 [', eigs[3], "%]")) + geom_label_repel(aes(label = rownames(mds$points)), color = 'black',size = 3.5) + print(mds2) + # ggtitle("MDS Axes 1 and 3") + #ggsave("mds_corr2-3.tiff") + mds3<-ggplot(as.data.frame(mds$points), aes(V1, V3, label = rownames(mds$points))) + labs(title="MDS Axes 1 and 3") + geom_point(color =as.character(dge$samples$color) ) + xlab(paste('dim 1 [', eigs[1], '%]')) +ylab(paste('dim 3 [', eigs[3], "%]")) + geom_label_repel(aes(label = rownames(mds$points)), color = 'black',size = 3.5) + print(mds3) + #ggsave("mds_corr1-3.tiff") +} + +DEanalysis <- function(norm_GE, data_list, asko_list, parameters){ + + normGEdisp <- estimateDisp(norm_GE, data_list$design) + if(parameters$glm=="lrt"){ + fit <- glmFit(normGEdisp, data_list$design, robust = T) + } + if(parameters$glm=="qlf"){ + fit <- glmQLFit(normGEdisp, data_list$design, robust = T) + plotQLDisp(fit) + } + + #plotMD.DGEGLM(fit) + #plotBCV(norm_GE) + + #sum<-norm_GE$genes + for (contrast in colnames(data_list$contrast)){ + print(asko_list$contrast$Contrast[contrast]) + if(parameters$glm=="lrt"){ + glm_test<-glmLRT(fit, contrast=data_list$contrast[,contrast]) + } + if(parameters$glm=="qlf"){ + glm_test<-glmQLFTest(fit, contrast=data_list$contrast[,contrast]) + } + + #sum[,contrast]<-decideTestsDGE(glm, adjust.method = parameters$p_adj_method, lfc=1) + #print(table(sum[,contrast])) + AskoStats(glm_test, fit, contrast, asko_list,normGEdisp,parameters) + } +} + +Asko_start <-function(){ + library(limma) + library(statmod) + library(edgeR) + library(ggplot2) + library(RColorBrewer) + library(ggrepel) + library(gplots) + library(stringr) + library(optparse) + option_list = list( + make_option(c("-o", "--out"), type="character", default="out.pdf",dest="output_pdf", + help="output file name [default= %default]", metavar="character"), + make_option(c("-d", "--dir"), type="character", default=".",dest="dir_path", + help="data directory path [default= %default]", metavar="character"), + make_option("--outdir", type="character", default=".",dest="out_dir", + help="outputs directory [default= %default]", metavar="character"), + make_option(c("-O", "--org"), type="character", default="Asko", dest="organism", + help="Organism name [default= %default]", metavar="character"), + make_option(c("-f", "--fileofcount"), type="character", default=NULL, dest="fileofcount", + help="file of counts [default= %default]", metavar="character"), + make_option(c("-G", "--col_genes"), type="integer", default=1, dest="col_genes", + help="col of ids in count files [default= %default]", metavar="integer"), + make_option(c("-C", "--col_counts"), type="integer", default=7,dest="col_counts", + help="col of counts in count files [default= %default (featureCounts) ]", metavar="integer"), + make_option(c("-t", "--sep"), type="character", default="\t", dest="sep", + help="col separator [default= %default]", metavar="character"), + make_option(c("-a", "--annotation"), type="character", default="annotation.txt", dest="annotation_file", + help="annotation file [default= %default]", metavar="character"), + make_option(c("-s", "--sample"), type="character", default="Samples.txt", dest="sample_file", + help="Samples file [default= %default]", metavar="character"), + make_option(c("-c", "--contrasts"), type="character", default="Contrasts.txt",dest="contrast_file", + help="Contrasts file [default= %default]", metavar="character"), + make_option(c("-k", "--mk_context"), type="logical", default=FALSE,dest="mk_context", + help="generate automatically the context names [default= %default]", metavar="logical"), + make_option(c("-p", "--palette"), type="character", default="Set2", dest="palette", + help="Color palette (ggplot)[default= %default]", metavar="character"), + make_option(c("-R", "--regex"), type="logical", default=FALSE, dest="regex", + help="use regex when selecting/removing samples [default= %default]", metavar="logical"), + make_option(c("-S", "--select"), type="character", default=NULL, dest="select_sample", + help="selected samples [default= %default]", metavar="character"), + make_option(c("-r", "--remove"), type="character", default=NULL, dest="rm_sample", + help="removed samples [default= %default]", metavar="character"), + make_option(c("--th_cpm"), type="double", default=0.5, dest="threshold_cpm", + help="CPM's threshold [default= %default]", metavar="double"), + make_option(c("--rep"), type="integer", default=3, dest="replicate_cpm", + help="Minimum number of replicates [default= %default]", metavar="integer"), + make_option(c("--th_FDR"), type="double", default=0.05, dest="threshold_FDR", + help="FDR threshold [default= %default]", metavar="double"), + make_option(c("-n", "--normalization"), type="character", default="TMM", dest="normal_method", + help="normalization method (TMM/RLE/upperquartile/none) [default= %default]", metavar="character"), + make_option(c("--adj"), type="character", default="fdr", dest="p_adj_method", + help="p-value adjust method (holm/hochberg/hommel/bonferroni/BH/BY/fdr/none) [default= %default]", metavar="character"), + make_option("--glm", type="character", default="qlf", dest="glm", + help=" GLM method (lrt/qlf) [default= %default]", metavar="character"), + make_option(c("--lfc"), type="logical", default="TRUE", dest="logFC", + help="logFC in the summary table [default= %default]", metavar="logical"), + make_option("--fc", type="logical", default="TRUE", dest="FC", + help="FC in the summary table [default= %default]", metavar="logical"), + make_option(c("--lcpm"), type="logical", default="FALSE", dest="logCPM", + help="logCPm in the summary table [default= %default]", metavar="logical"), + make_option("--fdr", type="logical", default="TRUE", dest="FDR", + help="FDR in the summary table [default= %default]", metavar="logical"), + make_option("--lr", type="logical", default="FALSE", dest="LR", + help="LR in the summary table [default= %default]", metavar="logical"), + make_option(c("--sign"), type="logical", default="TRUE", dest="Sign", + help="Significance (1/0/-1) in the summary table [default= %default]", metavar="logical"), + make_option(c("--expr"), type="logical", default="TRUE", dest="Expression", + help="Significance expression in the summary table [default= %default]", metavar="logical"), + make_option(c("--mc"), type="logical", default="TRUE", dest="mean_counts", + help="Mean counts in the summary table [default= %default]", metavar="logical"), + make_option(c("--hm"), type="logical", default="TRUE", dest="heatmap", + help="generation of the expression heatmap [default= %default]", metavar="logical"), + make_option(c("--nh"), type="integer", default="50", dest="numhigh", + help="number of genes in the heatmap [default= %default]", metavar="integer") + ) + opt_parser = OptionParser(option_list=option_list) + parameters = parse_args(opt_parser) + + if(is.null(parameters$rm_sample) == FALSE ) { + str_replace_all(parameters$rm_sample, " ", "") + parameters$rm_sample<-strsplit2(parameters$rm_sample, ",") + } + + if(is.null(parameters$select_sample) == FALSE ) { + str_replace_all(parameters$select_sample, " ", "") + parameters$select_sample<-strsplit2(parameters$select_sample, ",") + } + + dir.create(parameters$out_dir) + return(parameters) +}