Mercurial > repos > proteore > proteore_ms_observation_pepatlas
changeset 6:e77c0f3e9bab draft
planemo upload commit 22be35397ac49f1368a77fb1515f2293a41c14b3-dirty
author | proteore |
---|---|
date | Thu, 02 May 2019 05:08:06 -0400 |
parents | 034cfb96a482 |
children | 3e9ea4797b00 |
files | Get_ms-ms_observations.R Get_ms-ms_observations.xml |
diffstat | 2 files changed, 57 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/Get_ms-ms_observations.R Wed Mar 13 09:34:37 2019 -0400 +++ b/Get_ms-ms_observations.R Thu May 02 05:08:06 2019 -0400 @@ -18,12 +18,64 @@ } } +#take data frame, return data frame +split_ids_per_line <- function(line,ncol){ + + #print (line) + header = colnames(line) + line[ncol] = gsub("[[:blank:]]","",line[ncol]) + + if (length(unlist(strsplit(as.character(line[ncol]),";")))>1) { + if (length(line)==1 ) { + lines = as.data.frame(unlist(strsplit(as.character(line[ncol]),";")),stringsAsFactors = F) + } else { + if (ncol==1) { #first column + lines = suppressWarnings(cbind(unlist(strsplit(as.character(line[ncol]),";")), line[2:length(line)])) + } else if (ncol==length(line)) { #last column + lines = suppressWarnings(cbind(line[1:ncol-1],unlist(strsplit(as.character(line[ncol]),";")))) + } else { + lines = suppressWarnings(cbind(line[1:ncol-1], unlist(strsplit(as.character(line[ncol]),";"),use.names = F), line[(ncol+1):length(line)])) + } + } + colnames(lines)=header + return(lines) + } else { + return(line) + } +} + +#create new lines if there's more than one id per cell in the column in order to have only one id per line +one_id_one_line <-function(tab,ncol){ + if (ncol(tab)>1){ + tab[,ncol] = sapply(tab[,ncol],function(x) gsub("[[:blank:]]","",x)) + header=colnames(tab) + res=as.data.frame(matrix(ncol=ncol(tab),nrow=0)) + for (i in 1:nrow(tab) ) { + lines = split_ids_per_line(tab[i,],ncol) + res = rbind(res,lines) + } + }else { + res = unlist(sapply(tab[,1],function(x) strsplit(x,";")),use.names = F) + res = data.frame(res[which(!is.na(res[res!=""]))],stringsAsFactors = F) + colnames(res)=colnames(tab) + } + return(res) +} + nb_obs_PeptideAtlas <- function(input, atlas_file) { ## Calculate the sum of n_observations for each ID in input atlas = read_file(atlas_file, T) return(atlas$nb_obs[match(input,atlas$Uniprot_AC)]) } +#function to create a list of infos from file path +extract_info_from_path <- function(path) { + file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1] + date=tail(strsplit(file_name,"_")[[1]],n=1) + tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_") + return (c(date,tissue,file_name,path)) +} + main = function() { args <- commandArgs(TRUE) if(length(args)<1) { @@ -47,8 +99,8 @@ args <- as.list(as.character(argsDF$V2)) names(args) <- argsDF$V1 - #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") - #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") + #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/Get_ms-ms_observations/args.Rda") + #load("/home/dchristiany/proteore_project/ProteoRE/tools/Get_ms-ms_observations/args.Rda") # Extract input input_type = args$input_type @@ -65,18 +117,11 @@ } header = str2bool(args$header) file = read_file(filename, header) + file = one_id_one_line(file,ncol) #only one id per line input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F) } - + output = args$output - - #function to create a list of infos from file path - extract_info_from_path <- function(path) { - file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1] - date=tail(strsplit(file_name,"_")[[1]],n=1) - tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_") - return (c(date,tissue,file_name,path)) - } #data_frame building paths=strsplit(args$atlas,",")[[1]] @@ -86,7 +131,6 @@ # Annotations res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE) - colnames(res)=df$filename # Write output
--- a/Get_ms-ms_observations.xml Wed Mar 13 09:34:37 2019 -0400 +++ b/Get_ms-ms_observations.xml Thu May 02 05:08:06 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="retr_pepatlas1" name="Get MS/MS observations in tissue/fluid" version="2019.02.26"> +<tool id="retr_pepatlas1" name="Get MS/MS observations in tissue/fluid" version="2019.05.01"> <description>[Peptide Atlas]</description> <requirements> <requirement type="package" version="3.4.1">R</requirement>