0
+ − 1 # Load necessary libraries
+ − 2 library("org.Hs.eg.db", quietly=TRUE)
+ − 3 library("goProfiles", quietly=TRUE)
+ − 4
+ − 5 # Read file and return file content as data.frame?
+ − 6 readfile = function(filename, header) {
+ − 7 if (header == "true") {
+ − 8 # Read only the first two lines of the files as data (without headers):
+ − 9 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
+ − 10 #print("header")
+ − 11 #print(headers)
+ − 12 # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here :
+ − 13 #headers_names <- sapply(headers, paste, collapse = "_")
+ − 14 #print(headers_names)
+ − 15 #Read the data of the files (skipping the first 2 rows):
+ − 16 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
+ − 17 #print(file[1,])
+ − 18 #And assign the headers of step two to the data:
+ − 19 names(file) <- headers
+ − 20 }
+ − 21 else {
+ − 22 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
+ − 23 }
+ − 24 return(file)
+ − 25 }
+ − 26
+ − 27 #filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt"
+ − 28 #test = readfile(filename)
+ − 29 #str(test)
+ − 30 #str(test$Gene.names)
+ − 31 getprofile = function(ids, id_type, level) {
+ − 32
+ − 33 # Check if level is number
+ − 34 if (! as.numeric(level) %% 1 == 0) {
+ − 35 stop("Please enter an integer for level")
+ − 36 }
+ − 37 else {
+ − 38 level = as.numeric(level)
+ − 39 }
+ − 40 #genes = as.vector(file[,ncol])
+ − 41
+ − 42 # Extract Gene Entrez ID
+ − 43 if (id_type == "Entrez") {
+ − 44 id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first")
+ − 45 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
+ − 46 }
+ − 47 else {
+ − 48 genes_ids = c()
+ − 49 id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first")
+ − 50 #print(id[[1]][1])
+ − 51 genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
+ − 52 # IDs that have NA ENTREZID
+ − 53 NAs = id$UNIPROT[which(is.na(id$ENTREZID))]
+ − 54 print("IDs unable to convert to ENTREZID: ")
+ − 55 print(NAs)
+ − 56 }
+ − 57 #print(genes_ids)
+ − 58 # Convert Protein IDs into entrez ids
+ − 59
+ − 60 # for (i in 1:length(id$UNIPROT)) {
+ − 61 # print(i)
+ − 62 # if (is.na(id[[2]][i])) {
+ − 63 # print(id[[2]][i])
+ − 64 # }
+ − 65 # }
+ − 66 # a = id[which(id$ENTREZID == "NA"),]
+ − 67 # print(a)
+ − 68 # print(a$UNIPROT)
+ − 69 #print(id[[1]][which(is.na(id$ENTREZID))])
+ − 70 #print(genes_ids)
+ − 71 # for (gene in genes) {
+ − 72 # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA))
+ − 73 # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT")
+ − 74 # print(id)
+ − 75 # genes_ids = append(genes_ids, id$ENTREZID)
+ − 76 # }
+ − 77 #print(genes_ids)
+ − 78
+ − 79 # Create basic profiles
+ − 80 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
+ − 81 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
+ − 82 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
+ − 83 profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
+ − 84
+ − 85 # Print profile
+ − 86 # printProfiles(profile)
+ − 87
+ − 88 return(c(profile.CC, profile.MF, profile.BP, profile.ALL))
+ − 89 }
+ − 90
+ − 91 # Plot profiles to PNG
+ − 92 plotPNG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
+ − 93 if (!is.null(profile.CC)) {
+ − 94 png("profile.CC.png")
+ − 95 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 96 dev.off()
+ − 97 }
+ − 98 if (!is.null(profile.BP)) {
+ − 99 png("profile.BP.png")
+ − 100 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 101 dev.off()
+ − 102 }
+ − 103 if (!is.null(profile.MF)) {
+ − 104 png("profile.MF.png")
+ − 105 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 106 dev.off()
+ − 107 }
+ − 108 if (!is.null(profile.ALL)) {
+ − 109 png("profile.ALL.png")
+ − 110 plotProfiles(profile.ALL, percentage=per, multiplePlots=T, aTitle=title)
+ − 111 dev.off()
+ − 112 }
+ − 113 }
+ − 114
+ − 115 # Plot profiles to JPEG
+ − 116 plotJPEG = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
+ − 117 if (!is.null(profile.CC)) {
+ − 118 jpeg("profile.CC.jpeg")
+ − 119 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 120 dev.off()
+ − 121 }
+ − 122 if (!is.null(profile.BP)) {
+ − 123 jpeg("profile.BP.jpeg")
+ − 124 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 125 dev.off()
+ − 126 }
+ − 127 if (!is.null(profile.MF)) {
+ − 128 jpeg("profile.MF.jpeg")
+ − 129 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 130 dev.off()
+ − 131 }
+ − 132 if (!is.null(profile.ALL)) {
+ − 133 jpeg("profile.ALL.jpeg")
+ − 134 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 135 dev.off()
+ − 136 }
+ − 137 }
+ − 138
+ − 139 # Plot profiles to PDF
+ − 140 plotPDF = function(profile.CC = NULL, profile.BP = NULL, profile.MF = NULL, profile.ALL = NULL, per = TRUE, title = TRUE) {
+ − 141 if (!is.null(profile.CC)) {
+ − 142 pdf("profile.CC.pdf")
+ − 143 plotProfiles(profile.CC, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 144 dev.off()
+ − 145 }
+ − 146 if (!is.null(profile.BP)) {
+ − 147 pdf("profile.BP.pdf")
+ − 148 plotProfiles(profile.BP, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 149 dev.off()
+ − 150 }
+ − 151 if (!is.null(profile.MF)) {
+ − 152 pdf("profile.MF.pdf")
+ − 153 plotProfiles(profile.MF, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 154 dev.off()
+ − 155 }
+ − 156 if (!is.null(profile.ALL)) {
+ − 157 #print("all")
+ − 158 pdf("profile.ALL.pdf")
+ − 159 plotProfiles(profile.ALL, percentage=per, multiplePlots=FALSE, aTitle=title)
+ − 160 dev.off()
+ − 161 }
+ − 162 }
+ − 163
+ − 164 goprofiles = function() {
+ − 165 args = commandArgs(trailingOnly = TRUE)
+ − 166 #print(args)
+ − 167 # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title"
+ − 168 if (length(args) != 8) {
+ − 169 stop("Not enough/Too many arguments", call. = FALSE)
+ − 170 }
+ − 171 else {
+ − 172 input_type = args[2]
+ − 173 if (input_type == "text") {
+ − 174 input = strsplit(args[1], "\\s+")[[1]]
+ − 175 }
+ − 176 else if (input_type == "file") {
+ − 177 filename = strsplit(args[1], ",")[[1]][1]
+ − 178 ncol = strsplit(args[1], ",")[[1]][2]
+ − 179 # Check ncol
+ − 180 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
+ − 181 stop("Please enter an integer for level")
+ − 182 }
+ − 183 else {
+ − 184 ncol = as.numeric(gsub("c", "", ncol))
+ − 185 }
+ − 186 header = strsplit(args[1], ",")[[1]][3]
+ − 187 # Get file content
+ − 188 file = readfile(filename, header)
+ − 189 # Extract Protein IDs list
+ − 190 input = c()
+ − 191 for (row in as.character(file[,ncol])) {
+ − 192 input = c(input, strsplit(row, ";")[[1]][1])
+ − 193 }
+ − 194 }
+ − 195 id_type = args[3]
+ − 196 ontoopt = strsplit(args[4], ",")[[1]]
+ − 197 #print(ontoopt)
+ − 198 #plotopt = strsplit(args[3], ",")
+ − 199 plotopt = args[5]
+ − 200 level = args[6]
+ − 201 per = as.logical(args[7])
+ − 202 title = args[8]
+ − 203
+ − 204 profiles = getprofile(input, id_type, level)
+ − 205 profile.CC = profiles[1]
+ − 206 #print(profile.CC)
+ − 207 profile.MF = profiles[2]
+ − 208 #print(profile.MF)
+ − 209 profile.BP = profiles[3]
+ − 210 #print(profile.BP)
+ − 211 profile.ALL = profiles[-3:-1]
+ − 212 #print(profile.ALL)
+ − 213 #c(profile.ALL, profile.CC, profile.MF, profile.BP)
+ − 214 if ("CC" %in% ontoopt) {
+ − 215 if (grepl("PNG", plotopt)) {
+ − 216 plotPNG(profile.CC=profile.CC, per=per, title=title)
+ − 217 }
+ − 218 if (grepl("JPEG", plotopt)) {
+ − 219 plotJPEG(profile.CC = profile.CC, per=per, title=title)
+ − 220 }
+ − 221 if (grepl("PDF", plotopt)) {
+ − 222 plotPDF(profile.CC = profile.CC, per=per, title=title)
+ − 223 }
+ − 224 }
+ − 225 if ("MF" %in% ontoopt) {
+ − 226 if (grepl("PNG", plotopt)) {
+ − 227 plotPNG(profile.MF = profile.MF, per=per, title=title)
+ − 228 }
+ − 229 if (grepl("JPEG", plotopt)) {
+ − 230 plotJPEG(profile.MF = profile.MF, per=per, title=title)
+ − 231 }
+ − 232 if (grepl("PDF", plotopt)) {
+ − 233 plotPDF(profile.MF = profile.MF, per=per, title=title)
+ − 234 }
+ − 235 }
+ − 236 if ("BP" %in% ontoopt) {
+ − 237 if (grepl("PNG", plotopt)) {
+ − 238 plotPNG(profile.BP = profile.BP, per=per, title=title)
+ − 239 }
+ − 240 if (grepl("JPEG", plotopt)) {
+ − 241 plotJPEG(profile.BP = profile.BP, per=per, title=title)
+ − 242 }
+ − 243 if (grepl("PDF", plotopt)) {
+ − 244 plotPDF(profile.BP = profile.BP, per=per, title=title)
+ − 245 }
+ − 246 }
+ − 247
+ − 248 #if (grepl("PNG", plotopt)) {
+ − 249 # plotPNG(profile.ALL = profile.ALL, per=per, title=title)
+ − 250 #}
+ − 251 #if (grepl("JPEG", plotopt)) {
+ − 252 # plotJPEG(profile.ALL = profile.ALL, per=per, title=title)
+ − 253 #}
+ − 254 #if (grepl("PDF", plotopt)) {
+ − 255 # plotPDF(profile.ALL = profile.ALL, per=per, title=title)
+ − 256 #}
+ − 257 }
+ − 258
+ − 259 }
+ − 260
+ − 261 goprofiles()
+ − 262
+ − 263 #Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title"