Mercurial > repos > proteore > proteore_expression_rnaseq_abbased
comparison get_data_HPA_v2.R @ 5:f15cdeeba4b4 draft
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
author | proteore |
---|---|
date | Mon, 19 Mar 2018 10:07:38 -0400 |
parents | cf2fa609625b |
children |
comparison
equal
deleted
inserted
replaced
4:2f95774977ff | 5:f15cdeeba4b4 |
---|---|
14 # --column : column containing in input ENSG identifiers | 14 # --column : column containing in input ENSG identifiers |
15 # --select : information from HPA to select, may be | 15 # --select : information from HPA to select, may be |
16 # : RNA.tissue.category,Reliability..IH.,Reliability..IF. (comma-separated) | 16 # : RNA.tissue.category,Reliability..IH.,Reliability..IF. (comma-separated) |
17 # --output : output file name | 17 # --output : output file name |
18 # Useful functions | 18 # Useful functions |
19 | |
20 # Read file and return file content as data.frame | |
21 readfile = function(filename, header) { | |
22 if (header == "true") { | |
23 # Read only first line of the file as header: | |
24 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") | |
25 #Read the data of the files (skipping the first row) | |
26 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") | |
27 # Remove empty rows | |
28 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | |
29 #And assign the header to the data | |
30 names(file) <- headers | |
31 } | |
32 else { | |
33 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") | |
34 # Remove empty rows | |
35 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | |
36 } | |
37 return(file) | |
38 } | |
19 | 39 |
20 '%!in%' <- function(x,y)!('%in%'(x,y)) | 40 '%!in%' <- function(x,y)!('%in%'(x,y)) |
21 | 41 |
22 args = commandArgs(trailingOnly = TRUE) | 42 args = commandArgs(trailingOnly = TRUE) |
23 | 43 |
50 sample = sample[,column] | 70 sample = sample[,column] |
51 } | 71 } |
52 if (typeinput=="tabfile"){ | 72 if (typeinput=="tabfile"){ |
53 | 73 |
54 if (header=="TRUE"){ | 74 if (header=="TRUE"){ |
55 listfile = read.table(listfile,header=TRUE,sep="\t",quote="\"",fill=TRUE, na.strings=c("","NA")) | 75 listfile = readfile(listfile, "true") |
56 }else{ | 76 }else{ |
57 listfile = read.table(listfile,header=FALSE,sep="\t",quote="\"",fill=TRUE, na.strings=c("","NA")) | 77 listfile = readfile(listfile, "false") |
58 } | 78 } |
59 sample = listfile[,column] | 79 sample = listfile[,column] |
60 | 80 |
61 } | 81 } |
62 | 82 |
84 data = data[,to_keep] | 104 data = data[,to_keep] |
85 # if only some of the proteins were not found in proteinatlas they will be added to | 105 # if only some of the proteins were not found in proteinatlas they will be added to |
86 # the file with the fields "Protein not found in proteinatlas" | 106 # the file with the fields "Protein not found in proteinatlas" |
87 if (length(which(sample %!in% proteinatlas[,3]))!=0){ | 107 if (length(which(sample %!in% proteinatlas[,3]))!=0){ |
88 proteins_not_found = as.data.frame(sample[which(sample %!in% proteinatlas[,3])]) | 108 proteins_not_found = as.data.frame(sample[which(sample %!in% proteinatlas[,3])]) |
89 proteins_not_found = cbind(proteins_not_found,matrix(rep("Protein not found in HPA",length(proteins_not_found)),nrow=length(proteins_not_found),ncol=length(colnames(data))-1)) | 109 proteins_not_found = cbind(proteins_not_found,matrix(rep("Protein not found in HPA",length(proteins_not_found)),nrow=length(proteins_not_found),ncol=length(colnames(data))-1)) |
90 | 110 |
91 colnames(proteins_not_found)=colnames(data) | 111 colnames(proteins_not_found)=colnames(data) |
92 | 112 |
93 data = rbind(data,proteins_not_found) | 113 data = rbind(data,proteins_not_found) |
94 } | 114 } |