comparison protein_features.R @ 6:fc0118aa432a draft

planemo upload commit 5221e042cb207f593b144ed857106235b8f5fbde-dirty
author proteore
date Tue, 20 Mar 2018 11:13:49 -0400
parents 867d47ff782c
children af7089d1c7c0
comparison
equal deleted inserted replaced
5:867d47ff782c 6:fc0118aa432a
1 # Read file and return file content as data.frame 1 # Read file and return file content as data.frame
2 readfile = function(filename, header) { 2 readfile = function(filename, header) {
3 if (header == "true") { 3 if (header == "true") {
4 # Read only first line of the file as header: 4 # Read only first line of the file as header:
5 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) 5 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
6 #Read the data of the files (skipping the first row) 6 #Read the data of the files (skipping the first row)
7 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) 7 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
8 # Remove empty rows 8 # Remove empty rows
9 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] 9 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
10 #And assign the header to the data 10 #And assign the header to the data
11 names(file) <- headers 11 names(file) <- headers
12 } 12 }
13 else { 13 else {
14 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) 14 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
15 # Remove empty rows 15 # Remove empty rows
16 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] 16 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
17 } 17 }
18 return(file) 18 return(file)
19 } 19 }
47 args <- as.list(as.character(argsDF$V2)) 47 args <- as.list(as.character(argsDF$V2))
48 names(args) <- argsDF$V1 48 names(args) <- argsDF$V1
49 49
50 inputtype = args$inputtype 50 inputtype = args$inputtype
51 if (inputtype == "copypaste") { 51 if (inputtype == "copypaste") {
52 input = strsplit(args$input, " ")[[1]] 52 input = strsplit(args$input, "[ \t\n]+")[[1]]
53 } 53 }
54 else if (inputtype == "tabfile") { 54 else if (inputtype == "tabfile") {
55 filename = args$input 55 filename = args$input
56 ncol = args$column 56 ncol = args$column
57 # Check ncol 57 # Check ncol
88 input = gsub("^","NX_",input) 88 input = gsub("^","NX_",input)
89 } 89 }
90 90
91 # Select user input protein ids in nextprot 91 # Select user input protein ids in nextprot
92 if ((length(input[input %in% nextprot[,1]]))==0){ 92 if ((length(input[input %in% nextprot[,1]]))==0){
93 write.table("None of the input ids are can be found in Nextprot",file=filename,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) 93 write.table("None of the input ids are can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE)
94 } else { 94 } else {
95 names = c() 95 names = c()
96 res = matrix(nrow=length(input), ncol=0) 96 res = matrix(nrow=length(input), ncol=0)
97 97
98 # Get information from neXtProt 98 # Get information from neXtProt
115 names = c(names, arg) 115 names = c(names, arg)
116 info = nextprot[match(input, nextprot["NextprotID"][,]),][arg][,] 116 info = nextprot[match(input, nextprot["NextprotID"][,]),][arg][,]
117 res = cbind(res, info) 117 res = cbind(res, info)
118 } 118 }
119 } 119 }
120 --inputtype="tabfile" --input="Galaxy50-[ID_Converter_on_data_47].tabular" --header='true' --natlas="proteinatlas.csv" --column='c7' --select='Gene,Gene.description,Evidence,RNA.tissue.category,Reliability.IH,TPM.max.in.non.specific' --output="test.txt"
120 121
121 # Write output 122 # Write output
122 if (inputtype == "copypaste") { 123 if (inputtype == "copypaste") {
123 res = cbind(as.matrix(input), res) 124 res = cbind(as.matrix(input), res)
124 names = c(typeid, names) 125 names = c(typeid, names)