proteore_go_terms_profiles_comparison: GO_prof

comparison GO_prof_comp.R @ 0:fe80e3b6b5c2 draft default tip

planemo upload commit b8671ffe2e12dc6612b971a3e6e1dc71496aefd0-dirty

author	proteore
date	Fri, 24 Jan 2020 10:34:33 -0500
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:fe80e3b6b5c2
+options(warn=-1)  #TURN OFF WARNINGS !!!!!!
+suppressMessages(library(clusterProfiler,quietly = TRUE))
+suppressMessages(library(plyr, quietly = TRUE))
+suppressMessages(library(ggplot2, quietly = TRUE))
+suppressMessages(library(DOSE, quietly = TRUE))
+#return the number of character from the longest description found (from the 10 first)
+max_str_length_10_first <- function(vector){
+vector <- as.vector(vector)
+nb_description = length(vector)
+if (nb_description >= 10){nb_description=10}
+return(max(nchar(vector[1:nb_description])))
+}
+str2bool <- function(x){
+if (any(is.element(c("t","true"),tolower(x)))){
+return (TRUE)
+}else if (any(is.element(c("f","false"),tolower(x)))){
+return (FALSE)
+}else{
+return(NULL)
+}
+}
+get_args <- function(){
+## Collect arguments
+args <- commandArgs(TRUE)
+## Default setting when no arguments passed
+if(length(args) < 1) {
+args <- c("--help")
+}
+## Help section
+if("--help" %in% args) {
+cat("Selection and Annotation HPA
+Arguments:
+--inputtype1: type of input (list of id or filename)
+--inputtype2: type of input (list of id or filename)
+--input1: input1
+--input2: input2
+--column1: the column number which you would like to apply...
+--column2: the column number which you would like to apply...
+--header1: true/false if your file contains a header
+--header2: true/false if your file contains a header
+--ont: ontology to use
+--lev: ontology level
+--org: organism db package
+--list_name1: name of the first list
+--list_name2: name of the second list \n")
+q(save="no")
+}
+parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+args <- as.list(as.character(argsDF$V2))
+names(args) <- argsDF$V1
+return(args)
+}
+get_ids=function(inputtype, input, ncol, header) {
+if (inputtype == "text") {
+ids = strsplit(input, "[ \t\n]+")[[1]]
+} else if (inputtype == "file") {
+header=str2bool(header)
+ncol=get_cols(ncol)
+csv = read.csv(input,header=header, sep="\t", as.is=T)
+ids=csv[,ncol]
+}
+ids = unlist(strsplit(as.character(ids),";"))
+ids = ids[which(!is.na(ids))]
+return(ids)
+}
+str2bool <- function(x){
+if (any(is.element(c("t","true"),tolower(x)))){
+return (TRUE)
+}else if (any(is.element(c("f","false"),tolower(x)))){
+return (FALSE)
+}else{
+return(NULL)
+}
+}
+check_ids <- function(vector,type) {
+uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
+entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
+if (type == "entrez")
+return(grepl(entrez_id,vector))
+else if (type == "uniprot") {
+return(grepl(uniprot_pattern,vector))
+}
+}
+#res.cmp@compareClusterResult$Description <- sapply(as.vector(res.cmp@compareClusterResult$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE)
+fortify.compareClusterResult <- function(res.cmp, showCategory=30, by="geneRatio", split=NULL, includeAll=TRUE) {
+clProf.df <- as.data.frame(res.cmp)
+.split <- split
+## get top 5 (default) categories of each gene cluster.
+if (is.null(showCategory)) {
+result <- clProf.df
+} else {
+Cluster <- NULL # to satisfy codetools
+topN <- function(res, showCategory) {
+ddply(.data = res, .variables = .(Cluster), .fun = function(df, N) {
+if (length(df$Count) > N) {
+if (any(colnames(df) == "pvalue")) {
+idx <- order(df$pvalue, decreasing=FALSE)[1:N]
+} else {
+## for groupGO
+idx <- order(df$Count, decreasing=T)[1:N]
+}
+return(df[idx,])
+} else {
+return(df)
+}
+},
+N=showCategory
+)
+}
+if (!is.null(.split) && .split %in% colnames(clProf.df)) {
+lres <- split(clProf.df, as.character(clProf.df[, .split]))
+lres <- lapply(lres, topN, showCategory = showCategory)
+result <- do.call('rbind', lres)
+} else {
+result <- topN(clProf.df, showCategory)
+}
+}
+ID <- NULL
+if (includeAll == TRUE) {
+result = subset(clProf.df, ID %in% result$ID)
+}
+## remove zero count
+result$Description <- as.character(result$Description) ## un-factor
+GOlevel <- result[,c("ID", "Description")] ## GO ID and Term
+GOlevel <- unique(GOlevel)
+result <- result[result$Count != 0, ]
+result$Description <- factor(result$Description,levels=rev(GOlevel[,2]))
+if (by=="rowPercentage") {
+Description <- Count <- NULL # to satisfy codetools
+result <- ddply(result,.(Description),transform,Percentage = Count/sum(Count),Total = sum(Count))
+## label GO Description with gene counts.
+x <- mdply(result[, c("Description", "Total")], paste, sep=" (")
+y <- sapply(x[,3], paste, ")", sep="")
+result$Description <- y
+## restore the original order of GO Description
+xx <- result[,c(2,3)]
+xx <- unique(xx)
+rownames(xx) <- xx[,1]
+Termlevel <- xx[as.character(GOlevel[,1]),2]
+##drop the *Total* column
+result <- result[, colnames(result) != "Total"]
+result$Description <- factor(result$Description, levels=rev(Termlevel))
+} else if (by == "count") {
+## nothing
+} else if (by == "geneRatio") { ##default
+gsize <- as.numeric(sub("/\\d+$", "", as.character(result$GeneRatio)))
+gcsize <- as.numeric(sub("^\\d+/", "", as.character(result$GeneRatio)))
+result$GeneRatio = gsize/gcsize
+cluster <- paste(as.character(result$Cluster),"\n", "(", gcsize, ")", sep="")
+lv <- unique(cluster)[order(as.numeric(unique(result$Cluster)))]
+result$Cluster <- factor(cluster, levels = lv)
+} else {
+## nothing
+}
+return(result)
+}
+##function plotting.clusteProfile from clusterProfiler pkg
+plotting.clusterProfile <- function(clProf.reshape.df,x = ~Cluster,type = "dot", colorBy = "p.adjust",by = "geneRatio",title="",font.size=12) {
+Description <- Percentage <- Count <- Cluster <- GeneRatio <- p.adjust <- pvalue <- NULL # to
+if (type == "dot") {
+if (by == "rowPercentage") {
+p <- ggplot(clProf.reshape.df,
+aes_(x = x, y = ~Description, size = ~Percentage))
+} else if (by == "count") {
+p <- ggplot(clProf.reshape.df,
+aes_(x = x, y = ~Description, size = ~Count))
+} else if (by == "geneRatio") { ##DEFAULT
+p <- ggplot(clProf.reshape.df,
+aes_(x = x, y = ~Description, size = ~GeneRatio))
+} else {
+## nothing here
+}
+if (any(colnames(clProf.reshape.df) == colorBy)) {
+p <- p +
+geom_point() +
+aes_string(color=colorBy) +
+scale_color_continuous(low="red", high="blue", guide=guide_colorbar(reverse=TRUE))
+## scale_color_gradientn(guide=guide_colorbar(reverse=TRUE), colors = enrichplot:::sig_palette)
+} else {
+p <- p + geom_point(colour="steelblue")
+}
+}
+p <- p + xlab("") + ylab("") + ggtitle(title) +
+theme_dose(font.size)
+## theme(axis.text.x = element_text(colour="black", size=font.size, vjust = 1)) +
+##     theme(axis.text.y = element_text(colour="black",
+##           size=font.size, hjust = 1)) +
+##               ggtitle(title)+theme_bw()
+## p <- p + theme(axis.text.x = element_text(angle=angle.axis.x,
+##                    hjust=hjust.axis.x,
+##                    vjust=vjust.axis.x))
+return(p)
+}
+make_dotplot<-function(res.cmp,ontology) {
+dfok<-fortify.compareClusterResult(res.cmp)
+dfok$Description <- sapply(as.vector(dfok$Description), function(x) {ifelse(nchar(x)>50, substr(x,1,50),x)},USE.NAMES = FALSE)
+p<-plotting.clusterProfile(dfok, title="")
+#plot(p, type="dot") #
+output_path= paste("GO_profiles_comp_",ontology,".png",sep="")
+png(output_path,height = 720, width = 600)
+pl <- plot(p, type="dot")
+print(pl)
+dev.off()
+}
+get_cols <-function(input_cols) {
+input_cols <- gsub("c","",gsub("C","",gsub(" ","",input_cols)))
+if (grepl(":",input_cols)) {
+first_col=unlist(strsplit(input_cols,":"))[1]
+last_col=unlist(strsplit(input_cols,":"))[2]
+cols=first_col:last_col
+} else {
+cols = as.integer(unlist(strsplit(input_cols,",")))
+}
+return(cols)
+}
+#to check
+cmp.GO <- function(l,fun="groupGO",orgdb, ontology, level=3, readable=TRUE) {
+cmpGO<-compareCluster(geneClusters = l,
+fun=fun,
+OrgDb = orgdb,
+ont=ontology,
+level=level,
+readable=TRUE)
+return(cmpGO)
+}
+check_ids <- function(vector,type) {
+uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
+entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
+if (type == "entrez")
+return(grepl(entrez_id,vector))
+else if (type == "uniprot") {
+return(grepl(uniprot_pattern,vector))
+}
+}
+main = function() {
+#to get the args of the command line
+args=get_args()
+ids1<-get_ids(args$inputtype1, args$input1, args$column1, args$header1)
+ids2<-get_ids(args$inputtype2, args$input2, args$column2, args$header2)
+ont = strsplit(args$ont, ",")[[1]]
+lev=as.integer(args$lev)
+org=args$org
+#load annot package
+suppressMessages(library(args$org, character.only = TRUE, quietly = TRUE))
+# Extract OrgDb
+if (args$org=="org.Hs.eg.db") {
+orgdb<-org.Hs.eg.db
+} else if (args$org=="org.Mm.eg.db") {
+orgdb<-org.Mm.eg.db
+} else if (args$org=="org.Rn.eg.db") {
+orgdb<-org.Rn.eg.db
+}
+for(ontology in ont) {
+liste = list("l1"=ids1,"l2"=ids2)
+names(liste) = c(args$list_name1,args$list_name2)
+res.cmp<-cmp.GO(l=liste,fun="groupGO",orgdb, ontology, level=lev, readable=TRUE)
+make_dotplot(res.cmp,ontology)
+output_path = paste("GO_profiles_comp_",ontology,".tsv",sep="")
+write.table(res.cmp@compareClusterResult, output_path, sep="\t", row.names=F, quote=F)
+}
+} #end main
+main()

Mercurial > repos > proteore > proteore_go_terms_profiles_comparison

comparison GO_prof_comp.R @ 0:fe80e3b6b5c2 draft default tip