# HG changeset patch # User proteore # Date 1521543706 14400 # Node ID 69cf9e6283f8295bc9509c12f3d7aa4ce0ac5b2b # Parent f294fd77b143afb83456eef62768a983abbdfe17 planemo upload commit 1aa1bc2601a18344f518f8852ed8f1b0a36ae8b9-dirty diff -r f294fd77b143 -r 69cf9e6283f8 sel_ann_hpa.R --- a/sel_ann_hpa.R Wed Mar 14 12:22:51 2018 -0400 +++ b/sel_ann_hpa.R Tue Mar 20 07:01:46 2018 -0400 @@ -3,16 +3,18 @@ readfile = function(filename, header) { if (header == "true") { # Read only first line of the file as header: - headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) - #Read the data of the files (skipping the first row): - file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) + headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") + #Read the data of the files (skipping the first row) + file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") # Remove empty rows - #file <- file[!apply(is.na(file) | file == "", 1, all),] - #And assign the header to the data: + file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] + #And assign the header to the data names(file) <- headers } else { - file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) + file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") + # Remove empty rows + file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] } return(file) } @@ -133,7 +135,7 @@ # Extract input input_type = args$input_type if (input_type == "list") { - list_id = strsplit(args$input, " ")[[1]] + list_id = strsplit(args$input, " +")[[1]] } else if (input_type == "file") { filename = args$input @@ -172,8 +174,3 @@ } main() - -# Example commands -# Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSGid.txt" --ref_file="./pathology.tsv" --cancer="lung cancer,carcinoid" --not_mapped="true" --column_number="c1" --header="true" --output="test-data/ENSG_tissue_output_cancer.txt" -# Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSGid.txt" --ref_file="./normal_tissue.tsv" --tissue="lung" --level="Not detected,Medium,High,Low" --reliability="Approved,Supported,Uncertain" --column_number="c1" --header="true" --not_mapped="false" --output="./test-data/ENSG_tissue_output.txt" -# Rscript sel_ann_hpa.R --input_type="file" --input="./test-data/ENSG_no_not_match.txt" --ref_file="/Users/LinCun/Documents/ProteoRE/usecase1/normal_tissue.csv" --tissue="lung" --level="Not detected,Medium,High,Low" --reliability="Approved,Supportive,Uncertain" --column_number="c1" --header="true" --output="./test-data/ENSG_tissue_output2.txt" \ No newline at end of file