# HG changeset patch
# User proteore
# Date 1521543706 14400
# Node ID 69cf9e6283f8295bc9509c12f3d7aa4ce0ac5b2b
# Parent  f294fd77b143afb83456eef62768a983abbdfe17
planemo upload commit 1aa1bc2601a18344f518f8852ed8f1b0a36ae8b9-dirty

diff -r f294fd77b143 -r 69cf9e6283f8 sel_ann_hpa.R
--- a/sel_ann_hpa.R	Wed Mar 14 12:22:51 2018 -0400
+++ b/sel_ann_hpa.R	Tue Mar 20 07:01:46 2018 -0400
@@ -3,16 +3,18 @@
 readfile = function(filename, header) {
   if (header == "true") {
     # Read only first line of the file as header:
-    headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
-    #Read the data of the files (skipping the first row):
-    file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
+    headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
+    #Read the data of the files (skipping the first row)
+    file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
     # Remove empty rows
-    #file <- file[!apply(is.na(file) | file == "", 1, all),]
-    #And assign the header to the data:
+    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+    #And assign the header to the data
     names(file) <- headers
   }
   else {
-    file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
+    file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "")
+    # Remove empty rows
+    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
   }
   return(file)
 }
@@ -133,7 +135,7 @@
   # Extract input
   input_type = args$input_type
   if (input_type == "list") {
-    list_id = strsplit(args$input, " ")[[1]]
+    list_id = strsplit(args$input, " +")[[1]]
   }
   else if (input_type == "file") {
     filename = args$input
@@ -172,8 +174,3 @@
 }
 
 main()
-
-# Example commands
-# Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSGid.txt" --ref_file="./pathology.tsv" --cancer="lung cancer,carcinoid" --not_mapped="true" --column_number="c1" --header="true" --output="test-data/ENSG_tissue_output_cancer.txt"
-# Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSGid.txt" --ref_file="./normal_tissue.tsv" --tissue="lung" --level="Not detected,Medium,High,Low" --reliability="Approved,Supported,Uncertain" --column_number="c1" --header="true" --not_mapped="false" --output="./test-data/ENSG_tissue_output.txt"
-# Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSG_no_not_match.txt" --ref_file="/Users/LinCun/Documents/ProteoRE/usecase1/normal_tissue.csv" --tissue="lung" --level="Not detected,Medium,High,Low" --reliability="Approved,Supportive,Uncertain" --column_number="c1" --header="true" --output="./test-data/ENSG_tissue_output2.txt"
\ No newline at end of file