Mercurial > repos > proteore > proteore_expression_rnaseq_abbased
diff get_data_HPA_v2.R @ 5:f15cdeeba4b4 draft
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
author | proteore |
---|---|
date | Mon, 19 Mar 2018 10:07:38 -0400 |
parents | cf2fa609625b |
children |
line wrap: on
line diff
--- a/get_data_HPA_v2.R Wed Mar 14 11:27:05 2018 -0400 +++ b/get_data_HPA_v2.R Mon Mar 19 10:07:38 2018 -0400 @@ -17,6 +17,26 @@ # --output : output file name # Useful functions +# Read file and return file content as data.frame +readfile = function(filename, header) { + if (header == "true") { + # Read only first line of the file as header: + headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") + #Read the data of the files (skipping the first row) + file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") + # Remove empty rows + file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] + #And assign the header to the data + names(file) <- headers + } + else { + file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") + # Remove empty rows + file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] + } + return(file) +} + '%!in%' <- function(x,y)!('%in%'(x,y)) args = commandArgs(trailingOnly = TRUE) @@ -52,9 +72,9 @@ if (typeinput=="tabfile"){ if (header=="TRUE"){ - listfile = read.table(listfile,header=TRUE,sep="\t",quote="\"",fill=TRUE, na.strings=c("","NA")) + listfile = readfile(listfile, "true") }else{ - listfile = read.table(listfile,header=FALSE,sep="\t",quote="\"",fill=TRUE, na.strings=c("","NA")) + listfile = readfile(listfile, "false") } sample = listfile[,column] @@ -86,7 +106,7 @@ # the file with the fields "Protein not found in proteinatlas" if (length(which(sample %!in% proteinatlas[,3]))!=0){ proteins_not_found = as.data.frame(sample[which(sample %!in% proteinatlas[,3])]) - proteins_not_found = cbind(proteins_not_found,matrix(rep("Protein not found in HPA",length(proteins_not_found)),nrow=length(proteins_not_found),ncol=length(colnames(data))-1)) + proteins_not_found = cbind(proteins_not_found,matrix(rep("Protein not found in HPA",length(proteins_not_found)),nrow=length(proteins_not_found),ncol=length(colnames(data))-1)) colnames(proteins_not_found)=colnames(data)