Mercurial > repos > proteore > proteore_prot_features
comparison add_protein_features.R @ 18:0a9ae3d7dbf2 draft default tip
"planemo upload commit 7afd4b3ee25f024257ccbac6e51076d25b2a04e7"
author | proteore |
---|---|
date | Thu, 20 Aug 2020 03:09:52 -0400 |
parents | 7caa90759aba |
children |
comparison
equal
deleted
inserted
replaced
17:2952bae8a1ea | 18:0a9ae3d7dbf2 |
---|---|
46 --input: input | 46 --input: input |
47 --nextprot: path to nextprot information file | 47 --nextprot: path to nextprot information file |
48 --column: the column number which you would like to apply... | 48 --column: the column number which you would like to apply... |
49 --header: true/false if your file contains a header | 49 --header: true/false if your file contains a header |
50 --type: the type of input IDs (Uniprot_AC/EntrezID) | 50 --type: the type of input IDs (Uniprot_AC/EntrezID) |
51 --pc_features: IsoPoint,SeqLength,MW | 51 --pc_features: IsoPoint,SeqLength,MW,Chr,SubcellLocations,Diseases,protein_name,function,post_trans_mod,protein_family,pathway |
52 --localization: Chr,SubcellLocations | |
53 --diseases_info: Diseases | |
54 --output: text output filename \n") | 52 --output: text output filename \n") |
55 | 53 |
56 q(save="no") | 54 q(save="no") |
57 } | 55 } |
58 | 56 |
120 return(res) | 118 return(res) |
121 } | 119 } |
122 | 120 |
123 # Get information from neXtProt | 121 # Get information from neXtProt |
124 get_nextprot_info <- function(nextprot,input,pc_features,localization,diseases_info){ | 122 get_nextprot_info <- function(nextprot,input,pc_features,localization,diseases_info){ |
125 if(diseases_info){ | 123 cols = c("NextprotID",pc_features) |
126 cols = c("NextprotID",pc_features,localization,"Diseases") | |
127 } else { | |
128 cols = c("NextprotID",pc_features,localization) | |
129 } | |
130 | |
131 cols=cols[cols!="None"] | 124 cols=cols[cols!="None"] |
132 info = nextprot[match(input,nextprot$NextprotID),cols] | 125 info = nextprot[match(input,nextprot$NextprotID),intersect(colnames(nextprot),cols)] |
133 return(info) | 126 return(info) |
134 } | 127 } |
135 | 128 |
136 protein_features = function() { | 129 protein_features = function() { |
137 | 130 |
170 nextprot = read_file(args$nextprot,T) | 163 nextprot = read_file(args$nextprot,T) |
171 | 164 |
172 # Parse arguments | 165 # Parse arguments |
173 id_type = args$type | 166 id_type = args$type |
174 pc_features = strsplit(args$pc_features, ",")[[1]] | 167 pc_features = strsplit(args$pc_features, ",")[[1]] |
175 localization = strsplit(args$localization, ",")[[1]] | |
176 diseases_info = str2bool(args$diseases_info) | |
177 output = args$output | 168 output = args$output |
178 | 169 |
179 # Change the sample ids if they are Uniprot_AC ids to be able to match them with | 170 # Change the sample ids if they are Uniprot_AC ids to be able to match them with |
180 # Nextprot data | 171 # Nextprot data |
181 if (id_type=="Uniprot_AC"){ | 172 if (id_type=="Uniprot_AC"){ |
189 #Select user input protein ids in nextprot | 180 #Select user input protein ids in nextprot |
190 #NextprotID = unique(NextprotID[which(!is.na(NextprotID[NextprotID!=""]))]) | 181 #NextprotID = unique(NextprotID[which(!is.na(NextprotID[NextprotID!=""]))]) |
191 if (all(!NextprotID %in% nextprot[,1])){ | 182 if (all(!NextprotID %in% nextprot[,1])){ |
192 write.table("None of the input ids can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) | 183 write.table("None of the input ids can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) |
193 } else { | 184 } else { |
194 res <- get_nextprot_info(nextprot,NextprotID,pc_features,localization,diseases_info) | 185 res <- get_nextprot_info(nextprot,NextprotID,pc_features) |
195 res = res[!duplicated(res$NextprotID),] | 186 res = res[!duplicated(res$NextprotID),] |
196 output_content = merge(file, res,by.x=ncol,by.y="NextprotID",incomparables = NA,all.x=T) | 187 output_content = merge(file, res,by.x=ncol,by.y="NextprotID",incomparables = NA,all.x=T) |
197 output_content = order_columns(output_content,ncol,id_type,file) | 188 output_content = order_columns(output_content,ncol,id_type,file) |
198 if (id_type=="Uniprot_AC"){output_content = output_content[,-which(colnames(output_content)=="NextprotID")]} #remove nextprotID column | 189 if (id_type=="Uniprot_AC"){output_content = output_content[,-which(colnames(output_content)=="NextprotID")]} #remove nextprotID column |
199 output_content <- as.data.frame(apply(output_content, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA | 190 output_content <- as.data.frame(apply(output_content, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA |