Mercurial > repos > prog > lcmsmatching
diff UrlRequestScheduler.R @ 0:e66bb061af06 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 3529b25417f8e1a5836474c9adec4b696d35099d-dirty
author | prog |
---|---|
date | Tue, 12 Jul 2016 12:02:37 -0400 |
parents | |
children | 20d69a062da3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/UrlRequestScheduler.R Tue Jul 12 12:02:37 2016 -0400 @@ -0,0 +1,126 @@ +if ( ! exists('UrlRequestScheduler')) { # Do not load again if already loaded + + ############# + # CONSTANTS # + ############# + + RLIB.GET <- 'GET' + RLIB.POST <- 'POST' + + ##################### + # CLASS DECLARATION # + ##################### + + UrlRequestScheduler <- setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer")) + + # n: number of connections + # t: time (in seconds) + + # The scheduler restrict the number of connections at n per t seconds. + + ############### + # CONSTRUCTOR # + ############### + + UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) { + .n <<- n + .t <<- t + .time.of.last.request <<- -1 + .useragent <<- useragent + .nb.max.tries <<- 10L + .ssl.verifypeer <<- ssl.verifypeer + .verbose <<- 0L + callSuper(...) # calls super-class initializer with remaining parameters + }) + + ################## + # SET USER AGENT # + ################## + + UrlRequestScheduler$methods( setUserAgent = function(useragent) { + .useragent <<- useragent + }) + + ############### + # SET VERBOSE # + ############### + + UrlRequestScheduler$methods( setVerbose = function(verbose) { + .verbose <<- verbose + }) + + ################## + # WAIT AS NEEDED # + ################## + + # Wait the specified between two requests. + UrlRequestScheduler$methods( .wait.as.needed = function() { + + # Compute minimum waiting time between two URL requests + waiting_time <- .self$.t / .self$.n + + # Wait, if needed, before previous URL request and this new URL request. + if (.self$.time.of.last.request > 0) { + spent_time <- Sys.time() - .self$.time.of.last.request + if (spent_time < waiting_time) + Sys.sleep(waiting_time - spent_time) + } + + # Store current time + .time.of.last.request <<- Sys.time() + }) + + #################### + # GET CURL OPTIONS # + #################### + + UrlRequestScheduler$methods( .get_curl_opts = function(url) { + opts <- curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE) + return(opts) + }) + + ########### + # GET URL # + ########### + + UrlRequestScheduler$methods( .doGetUrl = function(url, params = NULL, method = RLIB.GET) { + + content <- NA_character_ + + # Use form to send URL request + if ( ! is.null(params) && ! is.na(params)) + switch(method, + GET = { content <- getForm(url, .opts = .self$.get_curl_opts(), .params = params) }, + POST = { content <- postForm(url, .opts = .self$.get_curl_opts(), .params = params) }, + stop(paste('Unknown method "', method, '".')) + ) + + # Get URL normally + else + content <- getURL(url, .opts = .self$.get_curl_opts(), ssl.verifypeer = .self$.ssl.verifypeer) + + return(content) + }) + + UrlRequestScheduler$methods( getUrl = function(url, params = NULL, method = RLIB.GET) { + + # Load library here and not inside .doGetUrl() since it is called from inside a try/catch clause, hence if library is missing the error will be ignored. + library(bitops) + library(RCurl) + + content <- NA_character_ + + # Wait required time between two requests + .self$.wait.as.needed() + + # Run query + for (i in seq(.self$.nb.max.tries)) { + tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method) }, + error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } ) + if ( ! is.na(content)) + break + } + + return(content) + }) +}