Mercurial > repos > proteore > proteore_expression_rnaseq_abbased
annotate add_expression_HPA.R @ 7:c9943f867413 draft
planemo upload commit 6b011549498a096af60a494bc39c1a2078580a5a-dirty
author | proteore |
---|---|
date | Tue, 20 Mar 2018 10:46:46 -0400 |
parents | 71214d6034e7 |
children | 5c260bd3552e |
rev | line source |
---|---|
6
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
1 # Read file and return file content as data.frame |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
2 readfile = function(filename, header) { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
3 if (header == "true") { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
4 # Read only first line of the file as header: |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
5 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "", comment.char = "") |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
6 #Read the data of the files (skipping the first row) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
7 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "", comment.char = "") |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
8 # Remove empty rows |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
9 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
10 #And assign the header to the data |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
11 names(file) <- headers |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
12 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
13 else { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
14 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "", comment.char = "") |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
15 # Remove empty rows |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
16 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
17 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
18 return(file) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
19 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
20 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
21 add_expression = function(input, atlas, options) { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
22 if (all(!input %in% atlas$Ensembl)) { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
23 return(NULL) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
24 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
25 else { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
26 res = matrix(nrow=length(input), ncol=0) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
27 names = c() |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
28 for (opt in options) { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
29 names = c(names, opt) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
30 info = atlas[match(input, atlas$Ensembl,incomparable="NA"),][opt][,] |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
31 res = cbind(res, info) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
32 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
33 colnames(res) = names |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
34 return(res) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
35 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
36 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
37 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
38 main = function() { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
39 args <- commandArgs(TRUE) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
40 if(length(args)<1) { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
41 args <- c("--help") |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
42 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
43 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
44 # Help section |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
45 if("--help" %in% args) { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
46 cat("Selection and Annotation HPA |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
47 Arguments: |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
48 --inputtype: type of input (list of id or filename) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
49 --input: either a file name (e.g : input.txt) or a list of blank-separated |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
50 ENSG identifiers (e.g : ENSG00000283071 ENSG00000283072) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
51 --atlas: path to protein atlas file |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
52 --column: the column number which you would like to apply... |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
53 --header: true/false if your file contains a header |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
54 --select: information from HPA to select, maybe: |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
55 RNA.tissue.category,Reliability..IH.,Reliability..IF. (comma-separated) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
56 --output: text output filename \n") |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
57 q(save="no") |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
58 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
59 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
60 # Parse arguments |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
61 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
62 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
63 args <- as.list(as.character(argsDF$V2)) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
64 names(args) <- argsDF$V1 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
65 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
66 inputtype = args$inputtype |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
67 if (inputtype == "copypaste") { |
7
c9943f867413
planemo upload commit 6b011549498a096af60a494bc39c1a2078580a5a-dirty
proteore
parents:
6
diff
changeset
|
68 input = strsplit(args$input, "[ \t\n]+")[[1]] |
6
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
69 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
70 else if (inputtype == "tabfile") { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
71 filename = args$input |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
72 ncol = args$column |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
73 # Check ncol |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
74 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
75 stop("Please enter an integer for level") |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
76 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
77 else { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
78 ncol = as.numeric(gsub("c", "", ncol)) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
79 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
80 header = args$header |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
81 # Get file content |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
82 file = readfile(filename, header) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
83 # Extract Protein IDs list |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
84 input = c() |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
85 for (row in as.character(file[,ncol])) { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
86 input = c(input, strsplit(row, ";")[[1]][1]) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
87 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
88 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
89 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
90 # Read protein atlas |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
91 protein_atlas = args$atlas |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
92 protein_atlas = readfile(protein_atlas, "true") |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
93 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
94 # Add expression |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
95 output = args$output |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
96 names = c() |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
97 options = strsplit(args$select, ",")[[1]] |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
98 res = add_expression(input, protein_atlas, options) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
99 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
100 # Write output |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
101 if (is.null(res)) { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
102 write.table("None of the input ENSG ids are can be found in HPA data file",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
103 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
104 else { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
105 if (inputtype == "copypaste") { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
106 names = c("Ensembl", colnames(res)) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
107 res = cbind(as.matrix(input), res) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
108 colnames(res) = names |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
109 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
110 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
111 else if (inputtype == "tabfile") { |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
112 names = c(names(file), colnames(res)) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
113 output_content = cbind(file, res) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
114 colnames(output_content) = names |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
115 write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
116 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
117 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
118 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
119 } |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
120 |
71214d6034e7
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
proteore
parents:
diff
changeset
|
121 main() |