Mercurial > repos > proteore > proteore_ms_observation_pepatlas
annotate Get_ms-ms_observations.R @ 4:299eb4a62e0c draft
planemo upload commit 23c8d2008471f194131da1c7705c8bb0b6388236-dirty
author | proteore |
---|---|
date | Fri, 01 Feb 2019 11:48:49 -0500 |
parents | 6ab9d2778f04 |
children | e77c0f3e9bab |
rev | line source |
---|---|
2
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
1 # Read file and return file content as data.frame |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
2 read_file <- function(path,header){ |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
3 file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
4 if (inherits(file,"try-error")){ |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
5 stop("File not found !") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
6 }else{ |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
7 return(file) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
8 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
9 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
10 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
11 str2bool <- function(x){ |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
12 if (any(is.element(c("t","true"),tolower(x)))){ |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
13 return (TRUE) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
14 }else if (any(is.element(c("f","false"),tolower(x)))){ |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
15 return (FALSE) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
16 }else{ |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
17 return(NULL) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
18 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
19 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
20 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
21 nb_obs_PeptideAtlas <- function(input, atlas_file) { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
22 ## Calculate the sum of n_observations for each ID in input |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
23 atlas = read_file(atlas_file, T) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
24 return(atlas$nb_obs[match(input,atlas$Uniprot_AC)]) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
25 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
26 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
27 main = function() { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
28 args <- commandArgs(TRUE) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
29 if(length(args)<1) { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
30 args <- c("--help") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
31 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
32 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
33 # Help section |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
34 if("--help" %in% args) { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
35 cat("Selection and Annotation HPA |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
36 Arguments: |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
37 --input_type: type of input (list of id or filename) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
38 --input: input |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
39 --atlas: list of file(s) path to use |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
40 --output: text output filename \n") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
41 q(save="no") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
42 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
43 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
44 # Parse arguments |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
45 parseArgs <- function(x) strsplit(sub("^--", "", x), "=") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
46 argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
47 args <- as.list(as.character(argsDF$V2)) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
48 names(args) <- argsDF$V1 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
49 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
50 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
51 #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
52 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
53 # Extract input |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
54 input_type = args$input_type |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
55 if (input_type == "list") { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
56 input = strsplit(args$input, "[ \t\n]+")[[1]] |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
57 } else if (input_type == "file") { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
58 filename = args$input |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
59 ncol = args$column |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
60 # Check ncol |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
61 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
62 stop("Please enter an integer for level") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
63 } else { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
64 ncol = as.numeric(gsub("c", "", ncol)) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
65 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
66 header = str2bool(args$header) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
67 file = read_file(filename, header) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
68 input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
69 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
70 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
71 output = args$output |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
72 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
73 #function to create a list of infos from file path |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
74 extract_info_from_path <- function(path) { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
75 file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1] |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
76 date=tail(strsplit(file_name,"_")[[1]],n=1) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
77 tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
78 return (c(date,tissue,file_name,path)) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
79 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
80 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
81 #data_frame building |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
82 paths=strsplit(args$atlas,",")[[1]] |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
83 tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
84 df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
85 names(df) <- c("date","tissue","filename","path") |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
86 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
87 # Annotations |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
88 res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
89 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
90 colnames(res)=df$filename |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
91 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
92 # Write output |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
93 if (input_type == "list") { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
94 res = cbind(as.matrix(input), res) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
95 colnames(res)[1] = "Uniprot accession number" |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
96 } else if (input_type == "file") { |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
97 res = cbind(file, res) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
98 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
99 res = as.data.frame(apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
100 write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
101 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
102 } |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
103 |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
104 main() |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
105 #Rscript retrieve_peptideatlas.R --input_type="file" --input="test-data/FKW_Lacombe_et_al_2017_OK.txt" --atlas_brain="Human_Brain_201803_PeptideAtlas.txt" --column="c1" --header="true" --output="test-data/PeptideAtlas_output.txt" --atlas_urine="Human_Urine_201803_PeptideAtlas.txt" --atlas="brain,urine" |
6ab9d2778f04
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b
proteore
parents:
diff
changeset
|
106 |