comparison w4m_general_purpose_routines.R @ 2:c415b7dc6f37 draft default tip

planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 3e916537da6bb37e6f3927d7a11e98e0ab6ef5ec
author eschen42
date Mon, 05 Mar 2018 12:40:17 -0500
parents 6ccbe18131a6
children
comparison
equal deleted inserted replaced
1:02cafb660b72 2:c415b7dc6f37
1 ##-----------------------------------------------
2 ## helper functions for error detection/reporting
3 ##-----------------------------------------------
4
5 # ISO 8601 date ref: https://en.wikipedia.org/wiki/ISO_8601
6 iso_date <- function() {
7 format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")
8 }
9
10 # log-printing to stderr
11 log_print <- function(x, ...) {
12 cat(
13 sep=""
14 , file=stderr()
15 , iso_date()
16 , " "
17 , c(x, ...)
18 , "\n"
19 )
20 }
21
22 # format error for logging
23 format_error <- function(e) {
24 paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")
25 }
26
27 # tryCatchFunc produces a list
28 # func - a function that takes no arguments
29 # On success of func(), tryCatchFunc produces
30 # list(success = TRUE, value = func(), msg = "")
31 # On failure of func(), tryCatchFunc produces
32 # list(success = FALSE, value = NA, msg = "the error message")
33 tryCatchFunc <- function(func) {
34 retval <- NULL
35 tryCatch(
36 expr = {
37 retval <- ( list( success = TRUE, value = func(), msg = "" ) )
38 }
39 , error = function(e) {
40 retval <<- list( success = FALSE, value = NA, msg = format_error(e) )
41 }
42 )
43 return (retval)
44 }
45
1 # prepare.data.matrix - Prepare x.datamatrix for multivariate statistical analaysis (MVA) 46 # prepare.data.matrix - Prepare x.datamatrix for multivariate statistical analaysis (MVA)
2 # - Motivation: 47 # - Motivation:
3 # - Selection: 48 # - Selection:
4 # - You may want to exclude several samples from your analysis: 49 # - You may want to exclude several samples from your analysis:
5 # - If so, set the argument 'exclude.samples' to a vector of sample names 50 # - If so, set the argument 'exclude.samples' to a vector of sample names
6 # - You may want to exclude several features or features from your analysis: 51 # - You may want to exclude several features or features from your analysis:
7 # - If so, set the argument 'exclude.features' to a vector of feature names 52 # - If so, set the argument 'exclude.features' to a vector of feature names
8 # - Renaming samples: 53 # - Renaming samples:
9 # - You may want to rename several samples from your analysis: 54 # - You may want to rename several samples from your analysis:
10 # - If so, set the argument 'sample.rename.function' to a function accepting a vector 55 # - If so, set the argument 'sample.rename.function' to a function accepting a vector
11 # of sample names and producing a vector of strings of equivalent length 56 # of sample names and producing a vector of strings of equivalent length
12 # - MVA is confounded by missing values. 57 # - MVA is confounded by missing values.
13 # - By default, this function imputes missing values as zero. 58 # - By default, this function imputes missing values as zero.
14 # - For a different imputation, set the 'data.imputation' argument to a function 59 # - For a different imputation, set the 'data.imputation' argument to a function
15 # accepting a single matrix argument and returning a matrix of the same 60 # accepting a single matrix argument and returning a matrix of the same
17 # - Transformation 62 # - Transformation
18 # - It may be desirable to transform the intensity data to reduce the range. 63 # - It may be desirable to transform the intensity data to reduce the range.
19 # - By default, this function performs an eigth-root transformation: 64 # - By default, this function performs an eigth-root transformation:
20 # - Any root-tranformation has the advantage of never being negative. 65 # - Any root-tranformation has the advantage of never being negative.
21 # - Calculation of the eight-root is four times faster in my hands than log10. 66 # - Calculation of the eight-root is four times faster in my hands than log10.
22 # - However, it has the disadvantage that calculation of fold-differences 67 # - However, it has the disadvantage that calculation of fold-differences
23 # is not additive as with log-transformation. 68 # is not additive as with log-transformation.
24 # - Rather, you must divide the values and raise to the eighth power. 69 # - Rather, you must divide the values and raise to the eighth power.
25 # - For a different transformation, set the 'data.transformation' argument 70 # - For a different transformation, set the 'data.transformation' argument
26 # to a function accepting a single matrix argument. 71 # to a function accepting a single matrix argument.
27 # - The function should be written to return a matrix of the same dimensions 72 # - The function should be written to return a matrix of the same dimensions
105 , data.transformation = function(x) { 150 , data.transformation = function(x) {
106 sqrt( sqrt( sqrt(x) ) ) 151 sqrt( sqrt( sqrt(x) ) )
107 } 152 }
108 , en = new.env() 153 , en = new.env()
109 ) { 154 ) {
155 # log to environment
156 if ( !exists("log", envir = en) ) {
157 en$log <- c()
158 }
159 enlog <- function(s) { en$log <- c(en$log, s); s }
160 #enlog("foo")
161
110 # MatVar - Compute variance of rows or columns of a matrix 162 # MatVar - Compute variance of rows or columns of a matrix
111 # ref: http://stackoverflow.com/a/25100036 163 # ref: http://stackoverflow.com/a/25100036
112 # For row variance, dim == 1, for col variance, dim == 2 164 # For row variance, dim == 1, for col variance, dim == 2
113 MatVar <- function(x, dim = 1) { 165 MatVar <- function(x, dim = 1) {
114 if (dim == 1) { 166 if (dim == 1) {
135 } else stop("Please enter valid dimension, for rows, dim = 1; for colums, dim = 2") 187 } else stop("Please enter valid dimension, for rows, dim = 1; for colums, dim = 2")
136 } 188 }
137 189
138 nonzero.var <- function(x) { 190 nonzero.var <- function(x) {
139 if (nrow(x) == 0) { 191 if (nrow(x) == 0) {
140 print(str(x))
141 stop("matrix has no rows") 192 stop("matrix has no rows")
142 } 193 }
143 if (ncol(x) == 0) { 194 if (ncol(x) == 0) {
144 print(str(x))
145 stop("matrix has no columns") 195 stop("matrix has no columns")
146 } 196 }
147 if ( is.numeric(x) ) { 197 if ( is.numeric(x) ) {
148 # exclude any rows with zero variance 198 # exclude any rows with zero variance
149 row.vars <- MatVar(x, dim = 1) 199 row.vars <- MatVar(x, dim = 1)
151 nonzero.rows <- row.vars[nonzero.row.vars] 201 nonzero.rows <- row.vars[nonzero.row.vars]
152 if ( length(rownames(x)) != length(rownames(nonzero.rows)) ) { 202 if ( length(rownames(x)) != length(rownames(nonzero.rows)) ) {
153 row.names <- attr(nonzero.rows,"names") 203 row.names <- attr(nonzero.rows,"names")
154 x <- x[ row.names, , drop = FALSE ] 204 x <- x[ row.names, , drop = FALSE ]
155 } 205 }
156 206
157 # exclude any columns with zero variance 207 # exclude any columns with zero variance
158 column.vars <- MatVar(x, dim = 2) 208 column.vars <- MatVar(x, dim = 2)
159 nonzero.column.vars <- column.vars > 0 209 nonzero.column.vars <- column.vars > 0
160 nonzero.columns <- column.vars[nonzero.column.vars] 210 nonzero.columns <- column.vars[nonzero.column.vars]
161 if ( length(colnames(x)) != length(colnames(nonzero.columns)) ) { 211 if ( length(colnames(x)) != length(colnames(nonzero.columns)) ) {
168 218
169 if (is.null(x.matrix)) { 219 if (is.null(x.matrix)) {
170 stop("FATAL ERROR - prepare.data.matrix was called with null x.matrix") 220 stop("FATAL ERROR - prepare.data.matrix was called with null x.matrix")
171 } 221 }
172 222
223 enlog("prepare.data.matrix - get matrix")
224
173 en$xpre <- x <- x.matrix 225 en$xpre <- x <- x.matrix
174 226
175 # exclude any samples as indicated 227 # exclude any samples as indicated
176 if ( !is.null(exclude.features) ) { 228 if ( !is.null(exclude.features) ) {
229 enlog("prepare.data.matrix - exclude any samples as indicated")
177 my.colnames <- colnames(x) 230 my.colnames <- colnames(x)
178 my.col.diff <- setdiff(my.colnames, exclude.features) 231 my.col.diff <- setdiff(my.colnames, exclude.features)
179 x <- x[ , my.col.diff , drop = FALSE ] 232 x <- x[ , my.col.diff , drop = FALSE ]
180 } 233 }
181 234
182 # exclude any features as indicated 235 # exclude any features as indicated
183 if ( !is.null(exclude.samples) ) { 236 if ( !is.null(exclude.samples) ) {
237 enlog("prepare.data.matrix - exclude any features as indicated")
184 my.rownames <- rownames(x) 238 my.rownames <- rownames(x)
185 my.row.diff <- setdiff(my.rownames, exclude.samples) 239 my.row.diff <- setdiff(my.rownames, exclude.samples)
186 x <- x[ my.row.diff, , drop = FALSE ] 240 x <- x[ my.row.diff, , drop = FALSE ]
187 } 241 }
188 242
189 # rename rows if desired 243 # rename rows if desired
190 if ( !is.null(sample.rename.function) ) { 244 if ( !is.null(sample.rename.function) ) {
245 enlog("prepare.data.matrix - rename rows if desired")
191 renamed <- sample.rename.function(x) 246 renamed <- sample.rename.function(x)
192 rownames(x) <- renamed 247 rownames(x) <- renamed
193 } 248 }
194 249
250 enlog("prepare.data.matrix - save redacted x.datamatrix to environment")
251
195 # save redacted x.datamatrix to environment 252 # save redacted x.datamatrix to environment
196 en$redacted.data.matrix <- x 253 en$redacted.data.matrix <- x
197 254
198 # impute values missing from the x.datamatrix 255 # impute values missing from the x.datamatrix
199 if ( !is.null(data.imputation) ) { 256 if ( !is.null(data.imputation) ) {
257 enlog("prepare.data.matrix - impute values missing from the x.datamatrix")
200 x <- data.imputation(x) 258 x <- data.imputation(x)
201 } 259 }
202 260
203 # perform transformation if desired 261 # perform transformation if desired
204 if ( !is.null(data.transformation) ) { 262 if ( !is.null(data.transformation) ) {
263 enlog("prepare.data.matrix - perform transformation")
205 x <- data.transformation(x) 264 x <- data.transformation(x)
206 } else { 265 } else {
207 x <- x 266 x <- x
208 } 267 }
209 268
210 # purge rows and columns that have zero variance 269 # purge rows and columns that have zero variance
211 if ( is.numeric(x) ) { 270 if ( is.numeric(x) ) {
271 enlog("prepare.data.matrix - purge rows and columns that have zero variance")
212 x <- nonzero.var(x) 272 x <- nonzero.var(x)
213 } 273 }
214 274
215 # save imputed, transformed x.datamatrix to environment 275 # save imputed, transformed x.datamatrix to environment
216 en$imputed.transformed.data.matrix <- x 276 en$imputed.transformed.data.matrix <- x
217 277
218 return(x) 278 return(x)
219 } 279 }
220 280
221 281 # vim: sw=2 ts=2 et :
222 ##-----------------------------------------------
223 ## helper functions for error detection/reporting
224 ##-----------------------------------------------
225
226 # log-printing to stderr
227 log_print <- function(x, ...) {
228 cat(
229 format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")
230 , " "
231 , c(x, ...)
232 , "\n"
233 , sep=""
234 , file=stderr()
235 )
236 }
237
238 # tryCatchFunc produces a list
239 # On success of expr(), tryCatchFunc produces
240 # list(success TRUE, value = expr(), msg = "")
241 # On failure of expr(), tryCatchFunc produces
242 # list(success = FALSE, value = NA, msg = "the error message")
243 tryCatchFunc <- function(expr) {
244 # format error for logging
245 format_error <- function(e) {
246 paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")
247 }
248 my_expr <- expr
249 retval <- NULL
250 tryCatch(
251 expr = {
252 retval <- ( list( success = TRUE, value = my_expr(), msg = "" ) )
253 }
254 , error = function(e) {
255 retval <<- list( success = FALSE, value = NA, msg = format_error(e) )
256 }
257 )
258 return (retval)
259 }
260
261 # tryCatchProc produces a list
262 # On success of expr(), tryCatchProc produces
263 # list(success TRUE, msg = "")
264 # On failure of expr(), tryCatchProc produces
265 # list(success = FALSE, msg = "the error message")
266 tryCatchProc <- function(expr) {
267 # format error for logging
268 format_error <- function(e) {
269 paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")
270 }
271 retval <- NULL
272 tryCatch(
273 expr = {
274 expr()
275 retval <- ( list( success = TRUE, msg = "" ) )
276 }
277 , error = function(e) {
278 retval <<- list( success = FALSE, msg = format_error(e) )
279 }
280 )
281 return (retval)
282 }
283