proteore_expression_rnaseq_abbased: get_data_HPA

comparison get_data_HPA_v2.R @ 5:f15cdeeba4b4 draft

planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty

author	proteore
date	Mon, 19 Mar 2018 10:07:38 -0400
parents	cf2fa609625b
children

comparison

equal deleted inserted replaced

-:2f95774977ff
+:f15cdeeba4b4
 #	--column : column containing in input ENSG identifiers
 #	--select : information from HPA to select, may be
 #	: RNA.tissue.category,Reliability..IH.,Reliability..IF. (comma-separated)
 # --output : output file name
 # Useful functions
+# Read file and return file content as data.frame
+readfile = function(filename, header) {
+if (header == "true") {
+# Read only first line of the file as header:
+headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
+#Read the data of the files (skipping the first row)
+file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
+# Remove empty rows
+file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+#And assign the header to the data
+names(file) <- headers
+}
+else {
+file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
+# Remove empty rows
+file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+}
+return(file)
+}
 '%!in%' <- function(x,y)!('%in%'(x,y))
 args = commandArgs(trailingOnly = TRUE)
 sample = sample[,column]
 }
 if (typeinput=="tabfile"){
 if (header=="TRUE"){
-listfile = read.table(listfile,header=TRUE,sep="\t",quote="\"",fill=TRUE, na.strings=c("","NA"))
+listfile = readfile(listfile, "true")
 }else{
-listfile = read.table(listfile,header=FALSE,sep="\t",quote="\"",fill=TRUE, na.strings=c("","NA"))
+listfile = readfile(listfile, "false")
 }
 sample = listfile[,column]
 }
 data = data[,to_keep]
 # if only some of the proteins were not found in proteinatlas they will be added to
 # the file with the fields "Protein not found in proteinatlas"
 if (length(which(sample %!in% proteinatlas[,3]))!=0){
 proteins_not_found = as.data.frame(sample[which(sample %!in% proteinatlas[,3])])
-	proteins_not_found = cbind(proteins_not_found,matrix(rep("Protein not found in HPA",length(proteins_not_found)),nrow=length(proteins_not_found),ncol=length(colnames(data))-1))
+	  proteins_not_found = cbind(proteins_not_found,matrix(rep("Protein not found in HPA",length(proteins_not_found)),nrow=length(proteins_not_found),ncol=length(colnames(data))-1))
 colnames(proteins_not_found)=colnames(data)
 data = rbind(data,proteins_not_found)
 }

Mercurial > repos > proteore > proteore_expression_rnaseq_abbased

comparison get_data_HPA_v2.R @ 5:f15cdeeba4b4 draft