annotate sel_ann_hpa.R @ 0:5501e74891e4 draft

planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
author proteore
date Fri, 16 Feb 2018 04:22:54 -0500
parents
children 69cf9e6283f8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
1
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
2 # Read file and return file content as data.frame
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
3 readfile = function(filename, header) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
4 if (header == "true") {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
5 # Read only first line of the file as header:
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
6 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
7 #Read the data of the files (skipping the first row):
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
8 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
9 # Remove empty rows
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
10 #file <- file[!apply(is.na(file) | file == "", 1, all),]
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
11 #And assign the header to the data:
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
12 names(file) <- headers
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
13 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
14 else {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
15 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
16 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
17 return(file)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
18 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
19
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
20
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
21 # input has to be a list of IDs in ENSG format
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
22 # tissue is one of unique(HPA.normal.tissue$Tissue)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
23 # level is one, or several, or 0 (=ALL) of "Not detected", "Medium", "High", "Low"
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
24 # reliability is one, or several, or 0 (=ALL) of "Approved", "Supported", "Uncertain"
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
25 annot.HPAnorm<-function(input, HPA_normal_tissue, tissue, level, reliability, not_mapped_option) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
26 dat <- subset(HPA_normal_tissue, Gene %in% input)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
27
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
28 if (length(tissue)==1) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
29 res.Tissue<-subset(dat, Tissue==tissue)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
30 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
31 if (length(tissue)>1) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
32 res.Tissue<-subset(dat, Tissue %in% tissue)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
33 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
34
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
35 if (length(level)==1) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
36 res.Level<-subset(res.Tissue, Level==level)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
37 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
38 if (length(level)>1) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
39 print(level)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
40 res.Level<-subset(res.Tissue, Level %in% level)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
41 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
42
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
43 if (length(reliability)==1) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
44 res.Rel<-subset(res.Level, Reliability==reliability)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
45 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
46 if (length(reliability)>1) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
47 print(reliability)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
48 res.Rel<-subset(res.Level, Reliability %in% reliability)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
49 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
50
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
51 if (not_mapped_option == "true") {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
52 if (length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)))>0) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
53 not_match_IDs <- matrix(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene)), ncol = 1, nrow = length(setdiff(intersect(input, unique(dat$Gene)), unique(res.Rel$Gene))))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
54 not.match <- matrix("not match", ncol = ncol(HPA_normal_tissue) - 1, nrow = length(not_match_IDs))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
55 not.match <- cbind(not_match_IDs, unname(not.match))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
56 colnames(not.match) <- colnames(HPA_normal_tissue)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
57 res <- rbind(res.Rel, not.match)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
58 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
59 else {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
60 res <- res.Rel
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
61 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
62 if (length(setdiff(input, unique(dat$Gene)))>0) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
63 not.mapped <- matrix(ncol = ncol(HPA_normal_tissue) - 1, nrow = length(setdiff(input, unique(dat$Gene))))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
64 not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
65 colnames(not.mapped) <- colnames(HPA_normal_tissue)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
66 res <- rbind(res, not.mapped)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
67 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
68 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
69 else {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
70 res <- res.Rel
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
71 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
72
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
73 return(res)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
74
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
75 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
76
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
77 annot.HPAcancer<-function(input, HPA_cancer_tissue, cancer, not_mapped_option) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
78 dat <- subset(HPA_cancer_tissue, Gene %in% input)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
79
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
80 if (length(cancer)==1) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
81 res.Cancer<-subset(dat, Cancer==cancer)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
82 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
83 if (length(cancer)>1) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
84 res.Cancer<-subset(dat, Cancer %in% cancer)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
85 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
86
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
87 if (not_mapped_option == "true") {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
88 not.mapped <- matrix(ncol=ncol(HPA_cancer_tissue)-1, nrow=length(setdiff(input, unique(dat$Gene))))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
89 not.mapped <- cbind(matrix(setdiff(input, unique(dat$Gene)), ncol = 1, nrow = length(setdiff(input, unique(dat$Gene)))), unname(not.mapped))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
90 colnames(not.mapped) <- colnames(HPA_cancer_tissue)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
91 res <- rbind(res.Cancer, not.mapped)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
92 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
93 else {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
94 res <- res.Cancer
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
95 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
96 return(res)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
97 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
98
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
99
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
100 main <- function() {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
101 args <- commandArgs(TRUE)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
102 if(length(args)<1) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
103 args <- c("--help")
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
104 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
105
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
106 # Help section
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
107 if("--help" %in% args) {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
108 cat("Selection and Annotation HPA
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
109 Arguments:
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
110 --ref_file: HPA normal/cancer tissue file path
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
111 --input_type: type of input (list of id or filename)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
112 --input: list of IDs in ENSG format
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
113 --column_number: the column number which you would like to apply...
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
114 --header: true/false if your file contains a header
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
115 --atlas: normal/cancer
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
116 if normal:
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
117 --tissue: list of tissues
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
118 --level: Not detected, Low, Medium, High
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
119 --reliability: Supportive, Uncertain
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
120 if cancer:
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
121 --cancer: Cancer tissues
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
122 --not_mapped: true/false if your output file should contain not-mapped and not-match IDs
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
123 --output: output filename \n")
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
124 q(save="no")
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
125 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
126
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
127 # Parse arguments
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
128 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
129 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
130 args <- as.list(as.character(argsDF$V2))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
131 names(args) <- argsDF$V1
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
132
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
133 # Extract input
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
134 input_type = args$input_type
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
135 if (input_type == "list") {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
136 list_id = strsplit(args$input, " ")[[1]]
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
137 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
138 else if (input_type == "file") {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
139 filename = args$input
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
140 column_number = as.numeric(gsub("c", "" ,args$column_number))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
141 header = args$header
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
142 file = readfile(filename, header)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
143 list_id = c()
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
144 print(file)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
145 list_id = sapply(strsplit(file[,column_number], ";"), "[", 1)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
146 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
147 input = list_id
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
148
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
149 # Read reference file
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
150 reference_file = read.table(args$ref_file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
151 print(colnames(reference_file))
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
152
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
153 # Extract other options
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
154 atlas = args$atlas
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
155 not_mapped_option = args$not_mapped
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
156 if (atlas=="normal") {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
157 tissue = strsplit(args$tissue, ",")[[1]]
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
158 level = strsplit(args$level, ",")[[1]]
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
159 reliability = strsplit(args$reliability, ",")[[1]]
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
160 # Calculation
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
161 res = annot.HPAnorm(input, reference_file, tissue, level, reliability, not_mapped_option)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
162 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
163 else if (atlas=="cancer") {
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
164 cancer = strsplit(args$cancer, ",")[[1]]
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
165 # Calculation
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
166 res = annot.HPAcancer(input, reference_file, cancer, not_mapped_option)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
167 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
168
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
169 # Write output
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
170 output = args$output
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
171 write.table(res, output, sep = "\t", quote = FALSE, row.names = FALSE)
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
172 }
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
173
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
174 main()
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
175
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
176 # Example commands
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
177 # Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSGid.txt" --ref_file="./pathology.tsv" --cancer="lung cancer,carcinoid" --not_mapped="true" --column_number="c1" --header="true" --output="test-data/ENSG_tissue_output_cancer.txt"
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
178 # Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSGid.txt" --ref_file="./normal_tissue.tsv" --tissue="lung" --level="Not detected,Medium,High,Low" --reliability="Approved,Supported,Uncertain" --column_number="c1" --header="true" --not_mapped="false" --output="./test-data/ENSG_tissue_output.txt"
5501e74891e4 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents:
diff changeset
179 # Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSG_no_not_match.txt" --ref_file="/Users/LinCun/Documents/ProteoRE/usecase1/normal_tissue.csv" --tissue="lung" --level="Not detected,Medium,High,Low" --reliability="Approved,Supportive,Uncertain" --column_number="c1" --header="true" --output="./test-data/ENSG_tissue_output2.txt"