diff add_protein_features.R @ 18:0a9ae3d7dbf2 draft default tip

"planemo upload commit 7afd4b3ee25f024257ccbac6e51076d25b2a04e7"
author proteore
date Thu, 20 Aug 2020 03:09:52 -0400
parents 7caa90759aba
children
line wrap: on
line diff
--- a/add_protein_features.R	Fri Jan 24 05:04:15 2020 -0500
+++ b/add_protein_features.R	Thu Aug 20 03:09:52 2020 -0400
@@ -48,9 +48,7 @@
         --column: the column number which you would like to apply...
         --header: true/false if your file contains a header
         --type: the type of input IDs (Uniprot_AC/EntrezID)
-        --pc_features: IsoPoint,SeqLength,MW
-        --localization: Chr,SubcellLocations
-        --diseases_info: Diseases
+        --pc_features: IsoPoint,SeqLength,MW,Chr,SubcellLocations,Diseases,protein_name,function,post_trans_mod,protein_family,pathway
         --output: text output filename \n")
     
     q(save="no")
@@ -122,14 +120,9 @@
 
 # Get information from neXtProt
 get_nextprot_info <- function(nextprot,input,pc_features,localization,diseases_info){
-  if(diseases_info){
-    cols = c("NextprotID",pc_features,localization,"Diseases")
-  } else {
-    cols = c("NextprotID",pc_features,localization)
-  }
-  
+  cols = c("NextprotID",pc_features)
   cols=cols[cols!="None"]
-  info = nextprot[match(input,nextprot$NextprotID),cols]
+  info = nextprot[match(input,nextprot$NextprotID),intersect(colnames(nextprot),cols)]
   return(info)
 }
 
@@ -172,8 +165,6 @@
   # Parse arguments
   id_type = args$type
   pc_features = strsplit(args$pc_features, ",")[[1]]
-  localization = strsplit(args$localization, ",")[[1]]
-  diseases_info = str2bool(args$diseases_info)
   output = args$output
 
   # Change the sample ids if they are Uniprot_AC ids to be able to match them with
@@ -191,7 +182,7 @@
   if (all(!NextprotID %in% nextprot[,1])){
     write.table("None of the input ids can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE)
   } else {
-    res <- get_nextprot_info(nextprot,NextprotID,pc_features,localization,diseases_info)
+    res <- get_nextprot_info(nextprot,NextprotID,pc_features)
     res = res[!duplicated(res$NextprotID),]
     output_content = merge(file, res,by.x=ncol,by.y="NextprotID",incomparables = NA,all.x=T)
     output_content = order_columns(output_content,ncol,id_type,file)