comparison add_protein_features.R @ 18:0a9ae3d7dbf2 draft default tip

"planemo upload commit 7afd4b3ee25f024257ccbac6e51076d25b2a04e7"
author proteore
date Thu, 20 Aug 2020 03:09:52 -0400
parents 7caa90759aba
children
comparison
equal deleted inserted replaced
17:2952bae8a1ea 18:0a9ae3d7dbf2
46 --input: input 46 --input: input
47 --nextprot: path to nextprot information file 47 --nextprot: path to nextprot information file
48 --column: the column number which you would like to apply... 48 --column: the column number which you would like to apply...
49 --header: true/false if your file contains a header 49 --header: true/false if your file contains a header
50 --type: the type of input IDs (Uniprot_AC/EntrezID) 50 --type: the type of input IDs (Uniprot_AC/EntrezID)
51 --pc_features: IsoPoint,SeqLength,MW 51 --pc_features: IsoPoint,SeqLength,MW,Chr,SubcellLocations,Diseases,protein_name,function,post_trans_mod,protein_family,pathway
52 --localization: Chr,SubcellLocations
53 --diseases_info: Diseases
54 --output: text output filename \n") 52 --output: text output filename \n")
55 53
56 q(save="no") 54 q(save="no")
57 } 55 }
58 56
120 return(res) 118 return(res)
121 } 119 }
122 120
123 # Get information from neXtProt 121 # Get information from neXtProt
124 get_nextprot_info <- function(nextprot,input,pc_features,localization,diseases_info){ 122 get_nextprot_info <- function(nextprot,input,pc_features,localization,diseases_info){
125 if(diseases_info){ 123 cols = c("NextprotID",pc_features)
126 cols = c("NextprotID",pc_features,localization,"Diseases")
127 } else {
128 cols = c("NextprotID",pc_features,localization)
129 }
130
131 cols=cols[cols!="None"] 124 cols=cols[cols!="None"]
132 info = nextprot[match(input,nextprot$NextprotID),cols] 125 info = nextprot[match(input,nextprot$NextprotID),intersect(colnames(nextprot),cols)]
133 return(info) 126 return(info)
134 } 127 }
135 128
136 protein_features = function() { 129 protein_features = function() {
137 130
170 nextprot = read_file(args$nextprot,T) 163 nextprot = read_file(args$nextprot,T)
171 164
172 # Parse arguments 165 # Parse arguments
173 id_type = args$type 166 id_type = args$type
174 pc_features = strsplit(args$pc_features, ",")[[1]] 167 pc_features = strsplit(args$pc_features, ",")[[1]]
175 localization = strsplit(args$localization, ",")[[1]]
176 diseases_info = str2bool(args$diseases_info)
177 output = args$output 168 output = args$output
178 169
179 # Change the sample ids if they are Uniprot_AC ids to be able to match them with 170 # Change the sample ids if they are Uniprot_AC ids to be able to match them with
180 # Nextprot data 171 # Nextprot data
181 if (id_type=="Uniprot_AC"){ 172 if (id_type=="Uniprot_AC"){
189 #Select user input protein ids in nextprot 180 #Select user input protein ids in nextprot
190 #NextprotID = unique(NextprotID[which(!is.na(NextprotID[NextprotID!=""]))]) 181 #NextprotID = unique(NextprotID[which(!is.na(NextprotID[NextprotID!=""]))])
191 if (all(!NextprotID %in% nextprot[,1])){ 182 if (all(!NextprotID %in% nextprot[,1])){
192 write.table("None of the input ids can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) 183 write.table("None of the input ids can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE)
193 } else { 184 } else {
194 res <- get_nextprot_info(nextprot,NextprotID,pc_features,localization,diseases_info) 185 res <- get_nextprot_info(nextprot,NextprotID,pc_features)
195 res = res[!duplicated(res$NextprotID),] 186 res = res[!duplicated(res$NextprotID),]
196 output_content = merge(file, res,by.x=ncol,by.y="NextprotID",incomparables = NA,all.x=T) 187 output_content = merge(file, res,by.x=ncol,by.y="NextprotID",incomparables = NA,all.x=T)
197 output_content = order_columns(output_content,ncol,id_type,file) 188 output_content = order_columns(output_content,ncol,id_type,file)
198 if (id_type=="Uniprot_AC"){output_content = output_content[,-which(colnames(output_content)=="NextprotID")]} #remove nextprotID column 189 if (id_type=="Uniprot_AC"){output_content = output_content[,-which(colnames(output_content)=="NextprotID")]} #remove nextprotID column
199 output_content <- as.data.frame(apply(output_content, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA 190 output_content <- as.data.frame(apply(output_content, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA