annotate kegg_identification.R @ 6:f4e32dee3b28 draft default tip

"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
author proteore
date Mon, 17 May 2021 12:29:42 +0000
parents d600ce7f2484
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
1 options(warn = -1) #TURN OFF WARNINGS !!!!!!
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
2
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
3 suppressMessages(library(KEGGREST))
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
4
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
5 get_args <- function() {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
6
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
7 ## Collect arguments
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
8 args <- commandArgs(TRUE)
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
9
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
10 ## Default setting when no arguments passed
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
11 if (length(args) < 1) {
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
12 args <- c("--help")
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
13 }
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
14
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
15 ## Help section
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
16 if ("--help" %in% args) {
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
17 cat("Pathview R script
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
18 Arguments:
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
19 --help Print this test
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
20 --input tab file
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
21 --id_list id list ',' separated
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
22 --id_type type of input ids (kegg-id, uniprot_AC,geneID)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
23 --id_column number og column containg ids of interest
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
24 --nb_pathways number of pathways to return
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
25 --header boolean
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
26 --output output path
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
27 --species species used to get specific pathways(hsa,mmu,rno)
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
28
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
29 Example:
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
30 Rscript keggrest.R --input='P31946,P62258' --id_type='uniprot'
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
31 --id_column 'c1' --header TRUE \n\n")
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
32
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
33 q(save = "no")
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
34 }
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
35
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
36 parseargs <- function(x) strsplit(sub("^--", "", x), "=")
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
37 argsdf <- as.data.frame(do.call("rbind", parseargs(args)))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
38 args <- as.list(as.character(argsdf$V2))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
39 names(args) <- argsdf$V1
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
40
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
41 return(args)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
42 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
43
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
44 str2bool <- function(x) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
45 if (any(is.element(c("t", "true"), tolower(x)))) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
46 return(TRUE)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
47 }else if (any(is.element(c("f", "false"), tolower(x)))) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
48 return(FALSE)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
49 }else {
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
50 return(NULL)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
51 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
52 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
53
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
54 read_file <- function(path, header) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
55 file <- try(read.csv(path, header = header, sep = "\t",
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
56 stringsAsFactors = FALSE, quote = "\"", check.names = F), silent = TRUE)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
57 if (inherits(file, "try-error")) {
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
58 stop("File not found !")
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
59 }else {
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
60 return(file)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
61 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
62 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
63
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
64 get_pathways_list <- function(species) {
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
65 ##all available pathways for the species
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
66 pathways <- keggLink("pathway", species)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
67 tot_path <- unique(pathways)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
68
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
69 ##formating the dat into a list object
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
70 ##key= pathway ID, value = genes of the pathway in the kegg format
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
71 pathways_list <- sapply(tot_path, function(pathway)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
72 names(which(pathways == pathway)))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
73 return(pathways_list)
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
74 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
75
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
76 get_list_from_cp <- function(list) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
77 list <- strsplit(list, "[ \t\n]+")[[1]]
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
78 list <- gsub("[[:blank:]]|\u00A0|NA", "", list)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
79 list <- list[which(!is.na(list[list != ""]))] #remove empty entry
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
80 list <- unique(gsub("-.+", "", list))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
81 #Remove isoform accession number (e.g. "-2")
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
82 return(list)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
83 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
84
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
85 geneid_to_kegg <- function(vector, species) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
86 vector <- sapply(vector, function(x) paste(species, x, sep = ":"),
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
87 USE.NAMES = F)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
88 return(vector)
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
89 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
90
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
91 to_keggid <- function(id_list, id_type) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
92 if (id_type == "ncbi-geneid") {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
93 id_list <- unique(geneid_to_kegg(id_list, args$species))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
94 }else if (id_type == "uniprot") {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
95 id_list <- unique(sapply(id_list, function(x)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
96 paste(id_type, ":", x, sep = ""), USE.NAMES = F))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
97 if (length(id_list) > 250) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
98 id_list <- split(id_list, ceiling(seq_along(id_list) / 250))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
99 id_list <- sapply(id_list, function(x) keggConv("genes", x))
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
100 id_list <- unique(unlist(id_list))
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
101 } else {
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
102 id_list <- unique(keggConv("genes", id_list))
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
103 }
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
104 } else if (id_type == "kegg-id") {
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
105 id_list <- unique(id_list)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
106 }
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
107 return(id_list)
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
108 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
109
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
110 #take data frame, return data frame
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
111 split_ids_per_line <- function(line, ncol) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
112
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
113 #print (line)
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
114 header <- colnames(line)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
115 line[ncol] <- gsub("[[:blank:]]|\u00A0", "", line[ncol])
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
116
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
117 if (length(unlist(strsplit(as.character(line[ncol]), ";"))) > 1) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
118 if (length(line) == 1) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
119 lines <- as.data.frame(unlist(strsplit(
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
120 as.character(line[ncol]), ";")), stringsAsFactors = F)
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
121 } else {
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
122 if (ncol == 1) { #first column
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
123 lines <- suppressWarnings(cbind(unlist(strsplit(
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
124 as.character(line[ncol]), ";")), line[2:length(line)]))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
125 } else if (ncol == length(line)) { #last column
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
126 lines <- suppressWarnings(cbind(line[1:ncol - 1],
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
127 unlist(strsplit(as.character(line[ncol]), ";"))))
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
128 } else {
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
129 lines <- suppressWarnings(cbind(line[1:ncol - 1],
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
130 unlist(strsplit(as.character(line[ncol]), ";"), use.names = F),
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
131 line[(ncol + 1):length(line)]))
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
132 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
133 }
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
134 colnames(lines) <- header
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
135 return(lines)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
136 } else {
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
137 return(line)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
138 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
139 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
140
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
141 #create new lines if there's more than one id per cell in the columns in order
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
142 #to have only one id per line
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
143 one_id_one_line <- function(tab, ncol) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
144
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
145 if (ncol(tab) > 1) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
146
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
147 tab[, ncol] <- sapply(tab[, ncol], function(x) gsub("[[:blank:]]", "", x))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
148 header <- colnames(tab)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
149 res <- as.data.frame(matrix(ncol = ncol(tab), nrow = 0))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
150 for (i in seq_len(nrow(tab))) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
151 lines <- split_ids_per_line(tab[i, ], ncol)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
152 res <- rbind(res, lines)
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
153 }
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
154 } else {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
155 res <- unlist(sapply(tab[, 1], function(x) strsplit(x, ";")), use.names = F)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
156 res <- data.frame(res[which(!is.na(res[res != ""]))], stringsAsFactors = F)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
157 colnames(res) <- colnames(tab)
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
158 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
159 return(res)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
160 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
161
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
162 kegg_mapping <- function(kegg_id_list, id_type, ref_ids) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
163
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
164 #mapping
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
165 map <- lapply(ref_ids, is.element, unique(kegg_id_list))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
166 names(map) <- sapply(names(map), function(x) gsub("path:", "", x),
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
167 USE.NAMES = FALSE) #remove the prefix "path:"
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
168
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
169 in_path <- sapply(map, function(x) length(which(x == TRUE)))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
170 tot_path <- sapply(map, length)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
171
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
172 ratio <- (as.numeric(in_path[which(in_path != 0)])) /
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
173 (as.numeric(tot_path[which(in_path != 0)]))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
174 ratio <- as.numeric(format(round(ratio * 100, 2), nsmall = 2))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
175
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
176 ##useful but LONG
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
177 ## to do before : in step 1
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
178 path_names <- names(in_path[which(in_path != 0)])
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
179 name <- sapply(path_names, function(x) keggGet(x)[[1]]$NAME,
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
180 USE.NAMES = FALSE)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
181
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
182 res <- data.frame(I(names(in_path[which(in_path != 0)])), I(name), ratio,
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
183 as.numeric(in_path[which(in_path != 0)]),
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
184 as.numeric(tot_path[which(in_path != 0)]))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
185 res <- res[order(as.numeric(res[, 3]), decreasing = TRUE), ]
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
186 colnames(res) <- c("pathway_ID", "Description",
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
187 "Ratio IDs mapped / total IDs (%)",
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
188 "nb KEGG genes IDs mapped in the pathway",
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
189 "nb total of KEGG genes IDs present in the pathway")
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
190
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
191 return(res)
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
192
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
193 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
194
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
195 #get args from command line
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
196 args <- get_args()
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
197
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
198 ###setting variables
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
199 header <- str2bool(args$header)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
200 if (!is.null(args$id_list)) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
201 id_list <- get_list_from_cp(args$id_list)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
202 } #get ids from copy/paste input
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
203 if (!is.null(args$input)) { #get ids from input file
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
204 csv <- read_file(args$input, header)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
205 ncol <- as.numeric(gsub("c", "", args$id_column))
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
206 csv <- one_id_one_line(csv, ncol)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
207 id_list <- as.vector(csv[, ncol])
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
208 id_list <- unique(id_list[which(!is.na(id_list[id_list != ""]))])
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
209 }
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
210
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
211 #convert to keggID if needed
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
212 id_list <- to_keggid(id_list, args$id_type)
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
213
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
214 #get pathways of species with associated KEGG ID genes
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
215 pathways_list <- get_pathways_list(args$species)
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
216
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
217 #mapping on pathways
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
218 res <- kegg_mapping(id_list, args$id_type, pathways_list)
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
219 if (nrow(res) > as.numeric(args$nb_pathways)) {
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
220 res <- res[1:args$nb_pathways, ]
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
221 }
1
d600ce7f2484 planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff changeset
222
6
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
223 write.table(res, file = args$output, quote = FALSE, sep = "\t",
f4e32dee3b28 "planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents: 1
diff changeset
224 row.names = FALSE, col.names = TRUE)