Mercurial > repos > proteore > proteore_prot_features
comparison add_protein_features.R @ 18:0a9ae3d7dbf2 draft default tip
"planemo upload commit 7afd4b3ee25f024257ccbac6e51076d25b2a04e7"
| author | proteore |
|---|---|
| date | Thu, 20 Aug 2020 03:09:52 -0400 |
| parents | 7caa90759aba |
| children |
comparison
equal
deleted
inserted
replaced
| 17:2952bae8a1ea | 18:0a9ae3d7dbf2 |
|---|---|
| 46 --input: input | 46 --input: input |
| 47 --nextprot: path to nextprot information file | 47 --nextprot: path to nextprot information file |
| 48 --column: the column number which you would like to apply... | 48 --column: the column number which you would like to apply... |
| 49 --header: true/false if your file contains a header | 49 --header: true/false if your file contains a header |
| 50 --type: the type of input IDs (Uniprot_AC/EntrezID) | 50 --type: the type of input IDs (Uniprot_AC/EntrezID) |
| 51 --pc_features: IsoPoint,SeqLength,MW | 51 --pc_features: IsoPoint,SeqLength,MW,Chr,SubcellLocations,Diseases,protein_name,function,post_trans_mod,protein_family,pathway |
| 52 --localization: Chr,SubcellLocations | |
| 53 --diseases_info: Diseases | |
| 54 --output: text output filename \n") | 52 --output: text output filename \n") |
| 55 | 53 |
| 56 q(save="no") | 54 q(save="no") |
| 57 } | 55 } |
| 58 | 56 |
| 120 return(res) | 118 return(res) |
| 121 } | 119 } |
| 122 | 120 |
| 123 # Get information from neXtProt | 121 # Get information from neXtProt |
| 124 get_nextprot_info <- function(nextprot,input,pc_features,localization,diseases_info){ | 122 get_nextprot_info <- function(nextprot,input,pc_features,localization,diseases_info){ |
| 125 if(diseases_info){ | 123 cols = c("NextprotID",pc_features) |
| 126 cols = c("NextprotID",pc_features,localization,"Diseases") | |
| 127 } else { | |
| 128 cols = c("NextprotID",pc_features,localization) | |
| 129 } | |
| 130 | |
| 131 cols=cols[cols!="None"] | 124 cols=cols[cols!="None"] |
| 132 info = nextprot[match(input,nextprot$NextprotID),cols] | 125 info = nextprot[match(input,nextprot$NextprotID),intersect(colnames(nextprot),cols)] |
| 133 return(info) | 126 return(info) |
| 134 } | 127 } |
| 135 | 128 |
| 136 protein_features = function() { | 129 protein_features = function() { |
| 137 | 130 |
| 170 nextprot = read_file(args$nextprot,T) | 163 nextprot = read_file(args$nextprot,T) |
| 171 | 164 |
| 172 # Parse arguments | 165 # Parse arguments |
| 173 id_type = args$type | 166 id_type = args$type |
| 174 pc_features = strsplit(args$pc_features, ",")[[1]] | 167 pc_features = strsplit(args$pc_features, ",")[[1]] |
| 175 localization = strsplit(args$localization, ",")[[1]] | |
| 176 diseases_info = str2bool(args$diseases_info) | |
| 177 output = args$output | 168 output = args$output |
| 178 | 169 |
| 179 # Change the sample ids if they are Uniprot_AC ids to be able to match them with | 170 # Change the sample ids if they are Uniprot_AC ids to be able to match them with |
| 180 # Nextprot data | 171 # Nextprot data |
| 181 if (id_type=="Uniprot_AC"){ | 172 if (id_type=="Uniprot_AC"){ |
| 189 #Select user input protein ids in nextprot | 180 #Select user input protein ids in nextprot |
| 190 #NextprotID = unique(NextprotID[which(!is.na(NextprotID[NextprotID!=""]))]) | 181 #NextprotID = unique(NextprotID[which(!is.na(NextprotID[NextprotID!=""]))]) |
| 191 if (all(!NextprotID %in% nextprot[,1])){ | 182 if (all(!NextprotID %in% nextprot[,1])){ |
| 192 write.table("None of the input ids can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) | 183 write.table("None of the input ids can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) |
| 193 } else { | 184 } else { |
| 194 res <- get_nextprot_info(nextprot,NextprotID,pc_features,localization,diseases_info) | 185 res <- get_nextprot_info(nextprot,NextprotID,pc_features) |
| 195 res = res[!duplicated(res$NextprotID),] | 186 res = res[!duplicated(res$NextprotID),] |
| 196 output_content = merge(file, res,by.x=ncol,by.y="NextprotID",incomparables = NA,all.x=T) | 187 output_content = merge(file, res,by.x=ncol,by.y="NextprotID",incomparables = NA,all.x=T) |
| 197 output_content = order_columns(output_content,ncol,id_type,file) | 188 output_content = order_columns(output_content,ncol,id_type,file) |
| 198 if (id_type=="Uniprot_AC"){output_content = output_content[,-which(colnames(output_content)=="NextprotID")]} #remove nextprotID column | 189 if (id_type=="Uniprot_AC"){output_content = output_content[,-which(colnames(output_content)=="NextprotID")]} #remove nextprotID column |
| 199 output_content <- as.data.frame(apply(output_content, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA | 190 output_content <- as.data.frame(apply(output_content, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA |
