# HG changeset patch
# User proteore
# Date 1521468458 14400
# Node ID f15cdeeba4b49cf8fa18b142fcd32838e6026fed
# Parent 2f95774977ff0ed6eafbb57d9f3f16cbc4621bcf
planemo upload commit 4af7ac25de19ca10b1654820e909c647a2d337b2-dirty
diff -r 2f95774977ff -r f15cdeeba4b4 expression_rnaseq_abbased.xml
--- a/expression_rnaseq_abbased.xml Wed Mar 14 11:27:05 2018 -0400
+++ b/expression_rnaseq_abbased.xml Mon Mar 19 10:07:38 2018 -0400
@@ -12,11 +12,11 @@
#if $inputtype.filetype == "copy_paste":
- Rscript --vanilla $__tool_directory__/get_data_HPA_v2.R --inputtype copypaste --input '$inputtype.genelist' --header FALSE --proteinatlas $__tool_directory__/proteinatlas.csv --column c1 --select '$options.hpaparams' --output '$output'
+ Rscript $__tool_directory__/add_expression_HPA.R --inputtype="copypaste" --input='$inputtype.genelist' --atlas="$__tool_directory__/proteinatlas.csv" --select='$options.hpaparams' --output='$output'
#else
- Rscript --vanilla $__tool_directory__/get_data_HPA_v2.R --inputtype tabfile --input '$inputtype.genelist' --header '$inputtype.header' --proteinatlas $__tool_directory__/proteinatlas.csv --column '$inputtype.column' --select '$options.hpaparams' --output '$output'
+ Rscript $__tool_directory__/add_expression_HPA.R --inputtype="tabfile" --input='$inputtype.genelist' --header='$inputtype.header' --atlas="$__tool_directory__/proteinatlas.csv" --column='$inputtype.column' --select='$options.hpaparams' --output='$output'
#end if
@@ -36,23 +36,23 @@
-
-
+
+
-
+
-
-
-
-
-
-
+
+
+
+
+
+
diff -r 2f95774977ff -r f15cdeeba4b4 get_data_HPA_v2.R
--- a/get_data_HPA_v2.R Wed Mar 14 11:27:05 2018 -0400
+++ b/get_data_HPA_v2.R Mon Mar 19 10:07:38 2018 -0400
@@ -17,6 +17,26 @@
# --output : output file name
# Useful functions
+# Read file and return file content as data.frame
+readfile = function(filename, header) {
+ if (header == "true") {
+ # Read only first line of the file as header:
+ headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
+ #Read the data of the files (skipping the first row)
+ file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
+ # Remove empty rows
+ file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+ #And assign the header to the data
+ names(file) <- headers
+ }
+ else {
+ file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
+ # Remove empty rows
+ file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+ }
+ return(file)
+}
+
'%!in%' <- function(x,y)!('%in%'(x,y))
args = commandArgs(trailingOnly = TRUE)
@@ -52,9 +72,9 @@
if (typeinput=="tabfile"){
if (header=="TRUE"){
- listfile = read.table(listfile,header=TRUE,sep="\t",quote="\"",fill=TRUE, na.strings=c("","NA"))
+ listfile = readfile(listfile, "true")
}else{
- listfile = read.table(listfile,header=FALSE,sep="\t",quote="\"",fill=TRUE, na.strings=c("","NA"))
+ listfile = readfile(listfile, "false")
}
sample = listfile[,column]
@@ -86,7 +106,7 @@
# the file with the fields "Protein not found in proteinatlas"
if (length(which(sample %!in% proteinatlas[,3]))!=0){
proteins_not_found = as.data.frame(sample[which(sample %!in% proteinatlas[,3])])
- proteins_not_found = cbind(proteins_not_found,matrix(rep("Protein not found in HPA",length(proteins_not_found)),nrow=length(proteins_not_found),ncol=length(colnames(data))-1))
+ proteins_not_found = cbind(proteins_not_found,matrix(rep("Protein not found in HPA",length(proteins_not_found)),nrow=length(proteins_not_found),ncol=length(colnames(data))-1))
colnames(proteins_not_found)=colnames(data)