Mercurial > repos > proteore > proteore_kegg_pathways_coverage
comparison compute_kegg_pathways.R @ 0:42d0805353b6 draft
planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
| author | proteore |
|---|---|
| date | Wed, 19 Sep 2018 05:38:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:42d0805353b6 |
|---|---|
| 1 library(KEGGREST) | |
| 2 | |
| 3 get_args <- function(){ | |
| 4 | |
| 5 ## Collect arguments | |
| 6 args <- commandArgs(TRUE) | |
| 7 | |
| 8 ## Default setting when no arguments passed | |
| 9 if(length(args) < 1) { | |
| 10 args <- c("--help") | |
| 11 } | |
| 12 | |
| 13 ## Help section | |
| 14 if("--help" %in% args) { | |
| 15 cat("Pathview R script | |
| 16 Arguments: | |
| 17 --help Print this test | |
| 18 --input tab file | |
| 19 --id_list | |
| 20 id list ',' separated | |
| 21 --id_type type of input ids (uniprot_AC or geneID) | |
| 22 --id_column number og column containg ids of interest | |
| 23 --nb_pathways number of pathways to return | |
| 24 --header boolean | |
| 25 --output output path | |
| 26 --ref ref file (l.hsa.gene.RData, l.hsa.up.RData, l.mmu.up.Rdata) | |
| 27 | |
| 28 Example: | |
| 29 Rscript keggrest.R --input='P31946,P62258' --id_type='uniprot' --id_column 'c1' --header TRUE \n\n") | |
| 30 | |
| 31 q(save="no") | |
| 32 } | |
| 33 | |
| 34 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") | |
| 35 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) | |
| 36 args <- as.list(as.character(argsDF$V2)) | |
| 37 names(args) <- argsDF$V1 | |
| 38 | |
| 39 return(args) | |
| 40 } | |
| 41 | |
| 42 args <- get_args() | |
| 43 | |
| 44 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/compute_KEGG_pathways/args.Rda") | |
| 45 #load("/home/dchristiany/proteore_project/ProteoRE/tools/compute_KEGG_pathways/args.Rda") | |
| 46 | |
| 47 ##function arguments : | |
| 48 ## id.ToMap = input from the user to map on the pathways = list of IDs | |
| 49 ## idType : must be "UNIPROT" or "ENTREZ" | |
| 50 ## org : for the moment can be "Hs" only. Has to evoluate to "Mm" | |
| 51 | |
| 52 str2bool <- function(x){ | |
| 53 if (any(is.element(c("t","true"),tolower(x)))){ | |
| 54 return (TRUE) | |
| 55 }else if (any(is.element(c("f","false"),tolower(x)))){ | |
| 56 return (FALSE) | |
| 57 }else{ | |
| 58 return(NULL) | |
| 59 } | |
| 60 } | |
| 61 | |
| 62 | |
| 63 read_file <- function(path,header){ | |
| 64 file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote=""),silent=TRUE) | |
| 65 if (inherits(file,"try-error")){ | |
| 66 stop("File not found !") | |
| 67 }else{ | |
| 68 return(file) | |
| 69 } | |
| 70 } | |
| 71 | |
| 72 ID2KEGG.Mapping<- function(id.ToMap,ref) { | |
| 73 | |
| 74 ref_ids = get(load(ref)) | |
| 75 map<-lapply(ref_ids, is.element, unique(id.ToMap)) | |
| 76 names(map) <- sapply(names(map), function(x) gsub("path:","",x),USE.NAMES = FALSE) #remove the prefix "path:" | |
| 77 | |
| 78 in.path<-sapply(map, function(x) length(which(x==TRUE))) | |
| 79 tot.path<-sapply(map, length) | |
| 80 | |
| 81 ratio<-(as.numeric(in.path[which(in.path!=0)])) / (as.numeric(tot.path[which(in.path!=0)])) | |
| 82 ratio <- as.numeric(format(round(ratio*100, 2), nsmall = 2)) | |
| 83 | |
| 84 ##useful but LONG | |
| 85 ## to do before : in step 1 | |
| 86 path.names<-names(in.path[which(in.path!=0)]) | |
| 87 name <- sapply(path.names, function(x) keggGet(x)[[1]]$NAME,USE.NAMES = FALSE) | |
| 88 | |
| 89 res<-data.frame(I(names(in.path[which(in.path!=0)])), I(name), ratio, as.numeric(in.path[which(in.path!=0)]), as.numeric(tot.path[which(in.path!=0)])) | |
| 90 res <- res[order(as.numeric(res[,3]),decreasing = TRUE),] | |
| 91 colnames(res)<-c("pathway_ID", "Description" , "Ratio IDs mapped/total IDs (%)" ,"nb genes mapped in the pathway", "nb total genes present in the pathway") | |
| 92 | |
| 93 return(res) | |
| 94 | |
| 95 } | |
| 96 | |
| 97 ###setting variables | |
| 98 header = str2bool(args$header) | |
| 99 if (!is.null(args$id_list)) {id_list <- strsplit(args$id_list,",")[[1]]} | |
| 100 if (!is.null(args$input)) { | |
| 101 csv <- read_file(args$input,header) | |
| 102 ncol <- as.numeric(gsub("c", "" ,args$id_column)) | |
| 103 id_list <- as.vector(csv[,ncol]) | |
| 104 } | |
| 105 id_type <- toupper(args$id_type) | |
| 106 | |
| 107 #mapping on pathways | |
| 108 res <- ID2KEGG.Mapping(id_list,args$ref) | |
| 109 if (nrow(res) > as.numeric(args$nb_pathways)) { res <- res[1:args$nb_pathways,] } | |
| 110 | |
| 111 write.table(res, file=args$output, quote=FALSE, sep='\t',row.names = FALSE, col.names = TRUE) | |
| 112 |
