Mercurial > repos > proteore > proteore_kegg_pathways_coverage
annotate kegg_identification.R @ 6:f4e32dee3b28 draft default tip
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
author | proteore |
---|---|
date | Mon, 17 May 2021 12:29:42 +0000 |
parents | d600ce7f2484 |
children |
rev | line source |
---|---|
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
1 options(warn = -1) #TURN OFF WARNINGS !!!!!! |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
2 |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
3 suppressMessages(library(KEGGREST)) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
4 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
5 get_args <- function() { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
6 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
7 ## Collect arguments |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
8 args <- commandArgs(TRUE) |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
9 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
10 ## Default setting when no arguments passed |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
11 if (length(args) < 1) { |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
12 args <- c("--help") |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
13 } |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
14 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
15 ## Help section |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
16 if ("--help" %in% args) { |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
17 cat("Pathview R script |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
18 Arguments: |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
19 --help Print this test |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
20 --input tab file |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
21 --id_list id list ',' separated |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
22 --id_type type of input ids (kegg-id, uniprot_AC,geneID) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
23 --id_column number og column containg ids of interest |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
24 --nb_pathways number of pathways to return |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
25 --header boolean |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
26 --output output path |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
27 --species species used to get specific pathways(hsa,mmu,rno) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
28 |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
29 Example: |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
30 Rscript keggrest.R --input='P31946,P62258' --id_type='uniprot' |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
31 --id_column 'c1' --header TRUE \n\n") |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
32 |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
33 q(save = "no") |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
34 } |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
35 |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
36 parseargs <- function(x) strsplit(sub("^--", "", x), "=") |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
37 argsdf <- as.data.frame(do.call("rbind", parseargs(args))) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
38 args <- as.list(as.character(argsdf$V2)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
39 names(args) <- argsdf$V1 |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
40 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
41 return(args) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
42 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
43 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
44 str2bool <- function(x) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
45 if (any(is.element(c("t", "true"), tolower(x)))) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
46 return(TRUE) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
47 }else if (any(is.element(c("f", "false"), tolower(x)))) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
48 return(FALSE) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
49 }else { |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
50 return(NULL) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
51 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
52 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
53 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
54 read_file <- function(path, header) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
55 file <- try(read.csv(path, header = header, sep = "\t", |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
56 stringsAsFactors = FALSE, quote = "\"", check.names = F), silent = TRUE) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
57 if (inherits(file, "try-error")) { |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
58 stop("File not found !") |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
59 }else { |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
60 return(file) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
61 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
62 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
63 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
64 get_pathways_list <- function(species) { |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
65 ##all available pathways for the species |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
66 pathways <- keggLink("pathway", species) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
67 tot_path <- unique(pathways) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
68 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
69 ##formating the dat into a list object |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
70 ##key= pathway ID, value = genes of the pathway in the kegg format |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
71 pathways_list <- sapply(tot_path, function(pathway) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
72 names(which(pathways == pathway))) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
73 return(pathways_list) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
74 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
75 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
76 get_list_from_cp <- function(list) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
77 list <- strsplit(list, "[ \t\n]+")[[1]] |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
78 list <- gsub("[[:blank:]]|\u00A0|NA", "", list) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
79 list <- list[which(!is.na(list[list != ""]))] #remove empty entry |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
80 list <- unique(gsub("-.+", "", list)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
81 #Remove isoform accession number (e.g. "-2") |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
82 return(list) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
83 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
84 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
85 geneid_to_kegg <- function(vector, species) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
86 vector <- sapply(vector, function(x) paste(species, x, sep = ":"), |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
87 USE.NAMES = F) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
88 return(vector) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
89 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
90 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
91 to_keggid <- function(id_list, id_type) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
92 if (id_type == "ncbi-geneid") { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
93 id_list <- unique(geneid_to_kegg(id_list, args$species)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
94 }else if (id_type == "uniprot") { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
95 id_list <- unique(sapply(id_list, function(x) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
96 paste(id_type, ":", x, sep = ""), USE.NAMES = F)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
97 if (length(id_list) > 250) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
98 id_list <- split(id_list, ceiling(seq_along(id_list) / 250)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
99 id_list <- sapply(id_list, function(x) keggConv("genes", x)) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
100 id_list <- unique(unlist(id_list)) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
101 } else { |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
102 id_list <- unique(keggConv("genes", id_list)) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
103 } |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
104 } else if (id_type == "kegg-id") { |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
105 id_list <- unique(id_list) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
106 } |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
107 return(id_list) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
108 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
109 |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
110 #take data frame, return data frame |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
111 split_ids_per_line <- function(line, ncol) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
112 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
113 #print (line) |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
114 header <- colnames(line) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
115 line[ncol] <- gsub("[[:blank:]]|\u00A0", "", line[ncol]) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
116 |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
117 if (length(unlist(strsplit(as.character(line[ncol]), ";"))) > 1) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
118 if (length(line) == 1) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
119 lines <- as.data.frame(unlist(strsplit( |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
120 as.character(line[ncol]), ";")), stringsAsFactors = F) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
121 } else { |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
122 if (ncol == 1) { #first column |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
123 lines <- suppressWarnings(cbind(unlist(strsplit( |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
124 as.character(line[ncol]), ";")), line[2:length(line)])) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
125 } else if (ncol == length(line)) { #last column |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
126 lines <- suppressWarnings(cbind(line[1:ncol - 1], |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
127 unlist(strsplit(as.character(line[ncol]), ";")))) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
128 } else { |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
129 lines <- suppressWarnings(cbind(line[1:ncol - 1], |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
130 unlist(strsplit(as.character(line[ncol]), ";"), use.names = F), |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
131 line[(ncol + 1):length(line)])) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
132 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
133 } |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
134 colnames(lines) <- header |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
135 return(lines) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
136 } else { |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
137 return(line) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
138 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
139 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
140 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
141 #create new lines if there's more than one id per cell in the columns in order |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
142 #to have only one id per line |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
143 one_id_one_line <- function(tab, ncol) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
144 |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
145 if (ncol(tab) > 1) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
146 |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
147 tab[, ncol] <- sapply(tab[, ncol], function(x) gsub("[[:blank:]]", "", x)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
148 header <- colnames(tab) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
149 res <- as.data.frame(matrix(ncol = ncol(tab), nrow = 0)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
150 for (i in seq_len(nrow(tab))) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
151 lines <- split_ids_per_line(tab[i, ], ncol) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
152 res <- rbind(res, lines) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
153 } |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
154 } else { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
155 res <- unlist(sapply(tab[, 1], function(x) strsplit(x, ";")), use.names = F) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
156 res <- data.frame(res[which(!is.na(res[res != ""]))], stringsAsFactors = F) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
157 colnames(res) <- colnames(tab) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
158 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
159 return(res) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
160 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
161 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
162 kegg_mapping <- function(kegg_id_list, id_type, ref_ids) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
163 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
164 #mapping |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
165 map <- lapply(ref_ids, is.element, unique(kegg_id_list)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
166 names(map) <- sapply(names(map), function(x) gsub("path:", "", x), |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
167 USE.NAMES = FALSE) #remove the prefix "path:" |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
168 |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
169 in_path <- sapply(map, function(x) length(which(x == TRUE))) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
170 tot_path <- sapply(map, length) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
171 |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
172 ratio <- (as.numeric(in_path[which(in_path != 0)])) / |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
173 (as.numeric(tot_path[which(in_path != 0)])) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
174 ratio <- as.numeric(format(round(ratio * 100, 2), nsmall = 2)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
175 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
176 ##useful but LONG |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
177 ## to do before : in step 1 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
178 path_names <- names(in_path[which(in_path != 0)]) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
179 name <- sapply(path_names, function(x) keggGet(x)[[1]]$NAME, |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
180 USE.NAMES = FALSE) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
181 |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
182 res <- data.frame(I(names(in_path[which(in_path != 0)])), I(name), ratio, |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
183 as.numeric(in_path[which(in_path != 0)]), |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
184 as.numeric(tot_path[which(in_path != 0)])) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
185 res <- res[order(as.numeric(res[, 3]), decreasing = TRUE), ] |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
186 colnames(res) <- c("pathway_ID", "Description", |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
187 "Ratio IDs mapped / total IDs (%)", |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
188 "nb KEGG genes IDs mapped in the pathway", |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
189 "nb total of KEGG genes IDs present in the pathway") |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
190 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
191 return(res) |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
192 |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
193 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
194 |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
195 #get args from command line |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
196 args <- get_args() |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
197 |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
198 ###setting variables |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
199 header <- str2bool(args$header) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
200 if (!is.null(args$id_list)) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
201 id_list <- get_list_from_cp(args$id_list) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
202 } #get ids from copy/paste input |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
203 if (!is.null(args$input)) { #get ids from input file |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
204 csv <- read_file(args$input, header) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
205 ncol <- as.numeric(gsub("c", "", args$id_column)) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
206 csv <- one_id_one_line(csv, ncol) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
207 id_list <- as.vector(csv[, ncol]) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
208 id_list <- unique(id_list[which(!is.na(id_list[id_list != ""]))]) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
209 } |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
210 |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
211 #convert to keggID if needed |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
212 id_list <- to_keggid(id_list, args$id_type) |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
213 |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
214 #get pathways of species with associated KEGG ID genes |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
215 pathways_list <- get_pathways_list(args$species) |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
216 |
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
217 #mapping on pathways |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
218 res <- kegg_mapping(id_list, args$id_type, pathways_list) |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
219 if (nrow(res) > as.numeric(args$nb_pathways)) { |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
220 res <- res[1:args$nb_pathways, ] |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
221 } |
1
d600ce7f2484
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b-dirty
proteore
parents:
diff
changeset
|
222 |
6
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
223 write.table(res, file = args$output, quote = FALSE, sep = "\t", |
f4e32dee3b28
"planemo upload commit 151e7b469b231bbc43c4c39e8e836b05ab6d2253-dirty"
proteore
parents:
1
diff
changeset
|
224 row.names = FALSE, col.names = TRUE) |