Mercurial > repos > proteore > proteore_expression_levels_by_tissue
annotate sel_ann_hpa.R @ 3:d7f909ae24d9 draft
planemo upload commit f338a1cf9a782938c228f9bd0ea19ef22eec35d8-dirty
author | proteore |
---|---|
date | Wed, 07 Mar 2018 09:56:25 -0500 |
parents | 5501e74891e4 |
children | 69cf9e6283f8 |
rev | line source |
---|---|
0
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
1 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
2 # Read file and return file content as data.frame |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
3 readfile = function(filename, header) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
4 if (header == "true") { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
5 # Read only first line of the file as header: |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
6 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
7 #Read the data of the files (skipping the first row): |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
8 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
9 # Remove empty rows |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
10 #file <- file[!apply(is.na(file) | file == "", 1, all),] |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
11 #And assign the header to the data: |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
12 names(file) <- headers |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
13 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
14 else { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
15 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
16 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
17 return(file) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
18 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
19 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
20 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
21 # input has to be a list of IDs in ENSG format |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
22 # tissue is one of unique(HPA.normal.tissue$Tissue) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
23 # level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low" |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
24 # reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain" |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
25 annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
26 dat <- subset(HPA_normal_tissue, Gene %in% input) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
27 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
28 if (length(tissue)==1) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
29 res.Tissue<-subset(dat, Tissue==tissue) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
30 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
31 if (length(tissue)>1) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
32 res.Tissue<-subset(dat, Tissue %in% tissue) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
33 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
34 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
35 if (length(level)==1) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
36 res.Level<-subset(res.Tissue, Level==level) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
37 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
38 if (length(level)>1) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
39 print(level) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
40 res.Level<-subset(res.Tissue, Level %in% level) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
41 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
42 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
43 if (length(reliability)==1) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
44 res.Rel<-subset(res.Level, Reliability==reliability) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
45 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
46 if (length(reliability)>1) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
47 print(reliability) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
48 res.Rel<-subset(res.Level, Reliability %in% reliability) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
49 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
50 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
51 if (not_mapped_option == "true") { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
52 if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
53 not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
54 not.match <- matrix("not match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs)) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
55 not.match <- cbind(not_match_IDs, unname(not.match)) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
56 colnames(not.match) <- colnames(HPA_normal_tissue) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
57 res <- rbind(res.Rel, not.match) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
58 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
59 else { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
60 res <- res.Rel |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
61 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
62 if (length(setdiff(input, unique(dat$Gene)))>0) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
63 not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene)))) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
64 not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
65 colnames(not.mapped) <- colnames(HPA_normal_tissue) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
66 res <- rbind(res, not.mapped) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
67 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
68 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
69 else { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
70 res <- res.Rel |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
71 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
72 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
73 return(res) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
74 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
75 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
76 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
77 annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
78 dat <- subset(HPA_cancer_tissue, Gene %in% input) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
79 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
80 if (length(cancer)==1) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
81 res.Cancer<-subset(dat, Cancer==cancer) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
82 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
83 if (length(cancer)>1) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
84 res.Cancer<-subset(dat, Cancer %in% cancer) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
85 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
86 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
87 if (not_mapped_option == "true") { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
88 not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene)))) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
89 not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped)) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
90 colnames(not.mapped) <- colnames(HPA_cancer_tissue) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
91 res <- rbind(res.Cancer, not.mapped) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
92 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
93 else { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
94 res <- res.Cancer |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
95 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
96 return(res) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
97 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
98 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
99 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
100 main <- function() { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
101 args <- commandArgs(TRUE) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
102 if(length(args)<1) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
103 args <- c("--help") |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
104 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
105 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
106 # Help section |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
107 if("--help" %in% args) { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
108 cat("Selection and Annotation HPA |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
109 Arguments: |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
110 --ref_file: HPA normal/cancer tissue file path |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
111 --input_type: type of input (list of id or filename) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
112 --input: list of IDs in ENSG format |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
113 --column_number: the column number which you would like to apply... |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
114 --header: true/false if your file contains a header |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
115 --atlas: normal/cancer |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
116 if normal: |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
117 --tissue: list of tissues |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
118 --level: Not detected, Low, Medium, High |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
119 --reliability: Supportive, Uncertain |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
120 if cancer: |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
121 --cancer: Cancer tissues |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
122 --not_mapped: true/false if your output file should contain not-mapped and not-match IDs |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
123 --output: output filename \n") |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
124 q(save="no") |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
125 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
126 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
127 # Parse arguments |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
128 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
129 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
130 args <- as.list(as.character(argsDF$V2)) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
131 names(args) <- argsDF$V1 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
132 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
133 # Extract input |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
134 input_type = args$input_type |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
135 if (input_type == "list") { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
136 list_id = strsplit(args$input, " ")[[1]] |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
137 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
138 else if (input_type == "file") { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
139 filename = args$input |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
140 column_number = as.numeric(gsub("c", "" ,args$column_number)) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
141 header = args$header |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
142 file = readfile(filename, header) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
143 list_id = c() |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
144 print(file) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
145 list_id = sapply(strsplit(file[,column_number], ";"), "[", 1) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
146 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
147 input = list_id |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
148 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
149 # Read reference file |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
150 reference_file = read.table(args$ref_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
151 print(colnames(reference_file)) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
152 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
153 # Extract other options |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
154 atlas = args$atlas |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
155 not_mapped_option = args$not_mapped |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
156 if (atlas=="normal") { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
157 tissue = strsplit(args$tissue, ",")[[1]] |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
158 level = strsplit(args$level, ",")[[1]] |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
159 reliability = strsplit(args$reliability, ",")[[1]] |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
160 # Calculation |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
161 res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
162 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
163 else if (atlas=="cancer") { |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
164 cancer = strsplit(args$cancer, ",")[[1]] |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
165 # Calculation |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
166 res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
167 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
168 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
169 # Write output |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
170 output = args$output |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
171 write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE) |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
172 } |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
173 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
174 main() |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
175 |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
176 # Example commands |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
177 # Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSGid.txt" --ref_file="./pathology.tsv" --cancer="lung cancer,carcinoid" --not_mapped="true" --column_number="c1" --header="true" --output="test-data/ENSG_tissue_output_cancer.txt" |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
178 # Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSGid.txt" --ref_file="./normal_tissue.tsv" --tissue="lung" --level="Not detected,Medium,High,Low" --reliability="Approved,Supported,Uncertain" --column_number="c1" --header="true" --not_mapped="false" --output="./test-data/ENSG_tissue_output.txt" |
5501e74891e4
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff
changeset
|
179 # Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSG_no_not_match.txt" --ref_file="/Users/LinCun/Documents/ProteoRE/usecase1/normal_tissue.csv" --tissue="lung" --level="Not detected,Medium,High,Low" --reliability="Approved,Supportive,Uncertain" --column_number="c1" --header="true" --output="./test-data/ENSG_tissue_output2.txt" |