Mercurial > repos > lnguyen > proteore_goprofiles
comparison goprofiles.R @ 0:6eeb2fb0c4bd draft default tip
planemo upload
| author | lnguyen |
|---|---|
| date | Sat, 16 Sep 2017 09:17:07 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6eeb2fb0c4bd |
|---|---|
| 1 # Load necessary libraries | |
| 2 library("org.Hs.eg.db", quietly=TRUE) | |
| 3 library("goProfiles", quietly=TRUE) | |
| 4 | |
| 5 # Read file and return file content as data.frame? | |
| 6 readfile = function(filename, header) { | |
| 7 if (header == "true") { | |
| 8 # Read only the first two lines of the files as data (without headers): | |
| 9 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | |
| 10 #print("header") | |
| 11 #print(headers) | |
| 12 # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here : | |
| 13 #headers_names <- sapply(headers, paste, collapse = "_") | |
| 14 #print(headers_names) | |
| 15 #Read the data of the files (skipping the first 2 rows): | |
| 16 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | |
| 17 #print(file[1,]) | |
| 18 #And assign the headers of step two to the data: | |
| 19 names(file) <- headers | |
| 20 } | |
| 21 else { | |
| 22 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) | |
| 23 } | |
| 24 return(file) | |
| 25 } | |
| 26 | |
| 27 #filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt" | |
| 28 #test = readfile(filename) | |
| 29 #str(test) | |
| 30 #str(test$Gene.names) | |
| 31 getprofile = function(ids, id_type, level) { | |
| 32 | |
| 33 # Check if level is number | |
| 34 if (! as.numeric(level) %% 1 == 0) { | |
| 35 stop("Please enter an integer for level") | |
| 36 } | |
| 37 else { | |
| 38 level = as.numeric(level) | |
| 39 } | |
| 40 #genes = as.vector(file[,ncol]) | |
| 41 | |
| 42 # Extract Gene Entrez ID | |
| 43 if (id_type == "Entrez") { | |
| 44 id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first") | |
| 45 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] | |
| 46 } | |
| 47 else { | |
| 48 genes_ids = c() | |
| 49 id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first") | |
| 50 #print(id[[1]][1]) | |
| 51 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))] | |
| 52 # IDs that have NA ENTREZID | |
| 53 NAs = id$UNIPROT[which(is.na(id$ENTREZID))] | |
| 54 print("IDs unable to convert to ENTREZID: ") | |
| 55 print(NAs) | |
| 56 } | |
| 57 #print(genes_ids) | |
| 58 # Convert Protein IDs into entrez ids | |
| 59 | |
| 60 # for (i in 1:length(id$UNIPROT)) { | |
| 61 # print(i) | |
| 62 # if (is.na(id[[2]][i])) { | |
| 63 # print(id[[2]][i]) | |
| 64 # } | |
| 65 # } | |
| 66 # a = id[which(id$ENTREZID == "NA"),] | |
| 67 # print(a) | |
| 68 # print(a$UNIPROT) | |
| 69 #print(id[[1]][which(is.na(id$ENTREZID))]) | |
| 70 #print(genes_ids) | |
| 71 # for (gene in genes) { | |
| 72 # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA)) | |
| 73 # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT") | |
| 74 # print(id) | |
| 75 # genes_ids = append(genes_ids, id$ENTREZID) | |
| 76 # } | |
| 77 #print(genes_ids) | |
| 78 | |
| 79 # Create basic profiles | |
| 80 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
| 81 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
| 82 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
| 83 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) | |
| 84 | |
| 85 # Print profile | |
| 86 # printProfiles(profile) | |
| 87 | |
| 88 return(c(profile.CC, profile.MF, profile.BP, profile.ALL)) | |
| 89 } | |
| 90 | |
| 91 # Plot profiles to PNG | |
| 92 plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
| 93 if (!is.null(profile.CC)) { | |
| 94 png("profile.CC.png") | |
| 95 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 96 dev.off() | |
| 97 } | |
| 98 if (!is.null(profile.BP)) { | |
| 99 png("profile.BP.png") | |
| 100 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 101 dev.off() | |
| 102 } | |
| 103 if (!is.null(profile.MF)) { | |
| 104 png("profile.MF.png") | |
| 105 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 106 dev.off() | |
| 107 } | |
| 108 if (!is.null(profile.ALL)) { | |
| 109 png("profile.ALL.png") | |
| 110 plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title) | |
| 111 dev.off() | |
| 112 } | |
| 113 } | |
| 114 | |
| 115 # Plot profiles to JPEG | |
| 116 plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
| 117 if (!is.null(profile.CC)) { | |
| 118 jpeg("profile.CC.jpeg") | |
| 119 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 120 dev.off() | |
| 121 } | |
| 122 if (!is.null(profile.BP)) { | |
| 123 jpeg("profile.BP.jpeg") | |
| 124 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 125 dev.off() | |
| 126 } | |
| 127 if (!is.null(profile.MF)) { | |
| 128 jpeg("profile.MF.jpeg") | |
| 129 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 130 dev.off() | |
| 131 } | |
| 132 if (!is.null(profile.ALL)) { | |
| 133 jpeg("profile.ALL.jpeg") | |
| 134 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 135 dev.off() | |
| 136 } | |
| 137 } | |
| 138 | |
| 139 # Plot profiles to PDF | |
| 140 plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) { | |
| 141 if (!is.null(profile.CC)) { | |
| 142 pdf("profile.CC.pdf") | |
| 143 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 144 dev.off() | |
| 145 } | |
| 146 if (!is.null(profile.BP)) { | |
| 147 pdf("profile.BP.pdf") | |
| 148 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 149 dev.off() | |
| 150 } | |
| 151 if (!is.null(profile.MF)) { | |
| 152 pdf("profile.MF.pdf") | |
| 153 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 154 dev.off() | |
| 155 } | |
| 156 if (!is.null(profile.ALL)) { | |
| 157 #print("all") | |
| 158 pdf("profile.ALL.pdf") | |
| 159 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title) | |
| 160 dev.off() | |
| 161 } | |
| 162 } | |
| 163 | |
| 164 goprofiles = function() { | |
| 165 args = commandArgs(trailingOnly = TRUE) | |
| 166 #print(args) | |
| 167 # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title" | |
| 168 if (length(args) != 8) { | |
| 169 stop("Not enough/Too many arguments", call. = FALSE) | |
| 170 } | |
| 171 else { | |
| 172 input_type = args[2] | |
| 173 if (input_type == "text") { | |
| 174 input = strsplit(args[1], "\\s+")[[1]] | |
| 175 } | |
| 176 else if (input_type == "file") { | |
| 177 filename = strsplit(args[1], ",")[[1]][1] | |
| 178 ncol = strsplit(args[1], ",")[[1]][2] | |
| 179 # Check ncol | |
| 180 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { | |
| 181 stop("Please enter an integer for level") | |
| 182 } | |
| 183 else { | |
| 184 ncol = as.numeric(gsub("c", "", ncol)) | |
| 185 } | |
| 186 header = strsplit(args[1], ",")[[1]][3] | |
| 187 # Get file content | |
| 188 file = readfile(filename, header) | |
| 189 # Extract Protein IDs list | |
| 190 input = c() | |
| 191 for (row in as.character(file[,ncol])) { | |
| 192 input = c(input, strsplit(row, ";")[[1]][1]) | |
| 193 } | |
| 194 } | |
| 195 id_type = args[3] | |
| 196 ontoopt = strsplit(args[4], ",")[[1]] | |
| 197 #print(ontoopt) | |
| 198 #plotopt = strsplit(args[3], ",") | |
| 199 plotopt = args[5] | |
| 200 level = args[6] | |
| 201 per = as.logical(args[7]) | |
| 202 title = args[8] | |
| 203 | |
| 204 profiles = getprofile(input, id_type, level) | |
| 205 profile.CC = profiles[1] | |
| 206 #print(profile.CC) | |
| 207 profile.MF = profiles[2] | |
| 208 #print(profile.MF) | |
| 209 profile.BP = profiles[3] | |
| 210 #print(profile.BP) | |
| 211 profile.ALL = profiles[-3:-1] | |
| 212 #print(profile.ALL) | |
| 213 #c(profile.ALL, profile.CC, profile.MF, profile.BP) | |
| 214 if ("CC" %in% ontoopt) { | |
| 215 if (grepl("PNG", plotopt)) { | |
| 216 plotPNG(profile.CC=profile.CC, per=per, title=title) | |
| 217 } | |
| 218 if (grepl("JPEG", plotopt)) { | |
| 219 plotJPEG(profile.CC = profile.CC, per=per, title=title) | |
| 220 } | |
| 221 if (grepl("PDF", plotopt)) { | |
| 222 plotPDF(profile.CC = profile.CC, per=per, title=title) | |
| 223 } | |
| 224 } | |
| 225 if ("MF" %in% ontoopt) { | |
| 226 if (grepl("PNG", plotopt)) { | |
| 227 plotPNG(profile.MF = profile.MF, per=per, title=title) | |
| 228 } | |
| 229 if (grepl("JPEG", plotopt)) { | |
| 230 plotJPEG(profile.MF = profile.MF, per=per, title=title) | |
| 231 } | |
| 232 if (grepl("PDF", plotopt)) { | |
| 233 plotPDF(profile.MF = profile.MF, per=per, title=title) | |
| 234 } | |
| 235 } | |
| 236 if ("BP" %in% ontoopt) { | |
| 237 if (grepl("PNG", plotopt)) { | |
| 238 plotPNG(profile.BP = profile.BP, per=per, title=title) | |
| 239 } | |
| 240 if (grepl("JPEG", plotopt)) { | |
| 241 plotJPEG(profile.BP = profile.BP, per=per, title=title) | |
| 242 } | |
| 243 if (grepl("PDF", plotopt)) { | |
| 244 plotPDF(profile.BP = profile.BP, per=per, title=title) | |
| 245 } | |
| 246 } | |
| 247 | |
| 248 #if (grepl("PNG", plotopt)) { | |
| 249 # plotPNG(profile.ALL = profile.ALL, per=per, title=title) | |
| 250 #} | |
| 251 #if (grepl("JPEG", plotopt)) { | |
| 252 # plotJPEG(profile.ALL = profile.ALL, per=per, title=title) | |
| 253 #} | |
| 254 #if (grepl("PDF", plotopt)) { | |
| 255 # plotPDF(profile.ALL = profile.ALL, per=per, title=title) | |
| 256 #} | |
| 257 } | |
| 258 | |
| 259 } | |
| 260 | |
| 261 goprofiles() | |
| 262 | |
| 263 #Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title" |
