Mercurial > repos > proteore > proteore_prot_features
comparison protein_features.R @ 6:fc0118aa432a draft
planemo upload commit 5221e042cb207f593b144ed857106235b8f5fbde-dirty
author | proteore |
---|---|
date | Tue, 20 Mar 2018 11:13:49 -0400 |
parents | 867d47ff782c |
children | af7089d1c7c0 |
comparison
equal
deleted
inserted
replaced
5:867d47ff782c | 6:fc0118aa432a |
---|---|
1 # Read file and return file content as data.frame | 1 # Read file and return file content as data.frame |
2 readfile = function(filename, header) { | 2 readfile = function(filename, header) { |
3 if (header == "true") { | 3 if (header == "true") { |
4 # Read only first line of the file as header: | 4 # Read only first line of the file as header: |
5 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) | 5 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") |
6 #Read the data of the files (skipping the first row) | 6 #Read the data of the files (skipping the first row) |
7 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) | 7 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") |
8 # Remove empty rows | 8 # Remove empty rows |
9 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | 9 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] |
10 #And assign the header to the data | 10 #And assign the header to the data |
11 names(file) <- headers | 11 names(file) <- headers |
12 } | 12 } |
13 else { | 13 else { |
14 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) | 14 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") |
15 # Remove empty rows | 15 # Remove empty rows |
16 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | 16 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] |
17 } | 17 } |
18 return(file) | 18 return(file) |
19 } | 19 } |
47 args <- as.list(as.character(argsDF$V2)) | 47 args <- as.list(as.character(argsDF$V2)) |
48 names(args) <- argsDF$V1 | 48 names(args) <- argsDF$V1 |
49 | 49 |
50 inputtype = args$inputtype | 50 inputtype = args$inputtype |
51 if (inputtype == "copypaste") { | 51 if (inputtype == "copypaste") { |
52 input = strsplit(args$input, " ")[[1]] | 52 input = strsplit(args$input, "[ \t\n]+")[[1]] |
53 } | 53 } |
54 else if (inputtype == "tabfile") { | 54 else if (inputtype == "tabfile") { |
55 filename = args$input | 55 filename = args$input |
56 ncol = args$column | 56 ncol = args$column |
57 # Check ncol | 57 # Check ncol |
88 input = gsub("^","NX_",input) | 88 input = gsub("^","NX_",input) |
89 } | 89 } |
90 | 90 |
91 # Select user input protein ids in nextprot | 91 # Select user input protein ids in nextprot |
92 if ((length(input[input %in% nextprot[,1]]))==0){ | 92 if ((length(input[input %in% nextprot[,1]]))==0){ |
93 write.table("None of the input ids are can be found in Nextprot",file=filename,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) | 93 write.table("None of the input ids are can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) |
94 } else { | 94 } else { |
95 names = c() | 95 names = c() |
96 res = matrix(nrow=length(input), ncol=0) | 96 res = matrix(nrow=length(input), ncol=0) |
97 | 97 |
98 # Get information from neXtProt | 98 # Get information from neXtProt |
115 names = c(names, arg) | 115 names = c(names, arg) |
116 info = nextprot[match(input, nextprot["NextprotID"][,]),][arg][,] | 116 info = nextprot[match(input, nextprot["NextprotID"][,]),][arg][,] |
117 res = cbind(res, info) | 117 res = cbind(res, info) |
118 } | 118 } |
119 } | 119 } |
120 --inputtype="tabfile" --input="Galaxy50-[ID_Converter_on_data_47].tabular" --header='true' --natlas="proteinatlas.csv" --column='c7' --select='Gene,Gene.description,Evidence,RNA.tissue.category,Reliability.IH,TPM.max.in.non.specific' --output="test.txt" | |
120 | 121 |
121 # Write output | 122 # Write output |
122 if (inputtype == "copypaste") { | 123 if (inputtype == "copypaste") { |
123 res = cbind(as.matrix(input), res) | 124 res = cbind(as.matrix(input), res) |
124 names = c(typeid, names) | 125 names = c(typeid, names) |