Mercurial > repos > prog > lcmsmatching
comparison UrlRequestScheduler.R @ 5:fb9c0409d85c draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
| author | prog |
|---|---|
| date | Wed, 19 Apr 2017 10:00:05 -0400 |
| parents | 20d69a062da3 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:b34c14151f25 | 5:fb9c0409d85c |
|---|---|
| 1 ############# | 1 if ( ! exists('UrlRequestScheduler')) { # Do not load again if already loaded |
| 2 # CONSTANTS # | |
| 3 ############# | |
| 4 | 2 |
| 5 BIODB.GET <- 'GET' | 3 ############# |
| 6 BIODB.POST <- 'POST' | 4 # CONSTANTS # |
| 5 ############# | |
| 7 | 6 |
| 8 ##################### | 7 RLIB.GET <- 'GET' |
| 9 # CLASS DECLARATION # | 8 RLIB.POST <- 'POST' |
| 10 ##################### | |
| 11 | 9 |
| 12 UrlRequestScheduler <- methods::setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer")) | 10 ##################### |
| 13 | 11 # CLASS DECLARATION # |
| 14 # n: number of connections | 12 ##################### |
| 15 # t: time (in seconds) | 13 |
| 16 | 14 UrlRequestScheduler <- setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer")) |
| 17 # The scheduler restrict the number of connections at n per t seconds. | 15 |
| 18 | 16 # n: number of connections |
| 19 ############### | 17 # t: time (in seconds) |
| 20 # CONSTRUCTOR # | 18 |
| 21 ############### | 19 # The scheduler restrict the number of connections at n per t seconds. |
| 22 | 20 |
| 23 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) { | 21 ############### |
| 24 .n <<- n | 22 # CONSTRUCTOR # |
| 25 .t <<- t | 23 ############### |
| 26 .time.of.last.request <<- -1 | 24 |
| 27 .useragent <<- useragent | 25 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) { |
| 28 .nb.max.tries <<- 10L | 26 .n <<- n |
| 29 .ssl.verifypeer <<- ssl.verifypeer | 27 .t <<- t |
| 30 .verbose <<- 0L | 28 .time.of.last.request <<- -1 |
| 31 callSuper(...) # calls super-class initializer with remaining parameters | 29 .useragent <<- useragent |
| 32 }) | 30 .nb.max.tries <<- 10L |
| 33 | 31 .ssl.verifypeer <<- ssl.verifypeer |
| 34 ################## | 32 .verbose <<- 0L |
| 35 # SET USER AGENT # | 33 callSuper(...) # calls super-class initializer with remaining parameters |
| 36 ################## | 34 }) |
| 37 | 35 |
| 38 UrlRequestScheduler$methods( setUserAgent = function(useragent) { | 36 ################## |
| 39 .useragent <<- useragent | 37 # SET USER AGENT # |
| 40 }) | 38 ################## |
| 41 | 39 |
| 42 ############### | 40 UrlRequestScheduler$methods( setUserAgent = function(useragent) { |
| 43 # SET VERBOSE # | 41 .useragent <<- useragent |
| 44 ############### | 42 }) |
| 45 | 43 |
| 46 UrlRequestScheduler$methods( setVerbose = function(verbose) { | 44 ############### |
| 47 .verbose <<- verbose | 45 # SET VERBOSE # |
| 48 }) | 46 ############### |
| 49 | 47 |
| 50 ################## | 48 UrlRequestScheduler$methods( setVerbose = function(verbose) { |
| 51 # WAIT AS NEEDED # | 49 .verbose <<- verbose |
| 52 ################## | 50 }) |
| 53 | 51 |
| 54 # Wait the specified between two requests. | 52 ################## |
| 55 UrlRequestScheduler$methods( .wait.as.needed = function() { | 53 # WAIT AS NEEDED # |
| 56 | 54 ################## |
| 57 # Compute minimum waiting time between two URL requests | 55 |
| 58 waiting_time <- .self$.t / .self$.n | 56 # Wait the specified between two requests. |
| 59 | 57 UrlRequestScheduler$methods( .wait.as.needed = function() { |
| 60 # Wait, if needed, before previous URL request and this new URL request. | 58 |
| 61 if (.self$.time.of.last.request > 0) { | 59 # Compute minimum waiting time between two URL requests |
| 62 spent_time <- Sys.time() - .self$.time.of.last.request | 60 waiting_time <- .self$.t / .self$.n |
| 63 if (spent_time < waiting_time) | 61 |
| 64 Sys.sleep(waiting_time - spent_time) | 62 # Wait, if needed, before previous URL request and this new URL request. |
| 65 } | 63 if (.self$.time.of.last.request > 0) { |
| 66 | 64 spent_time <- Sys.time() - .self$.time.of.last.request |
| 67 # Store current time | 65 if (spent_time < waiting_time) |
| 68 .time.of.last.request <<- Sys.time() | 66 Sys.sleep(waiting_time - spent_time) |
| 69 }) | 67 } |
| 70 | 68 |
| 71 ######################### | 69 # Store current time |
| 72 # GET CURL OPTIONS {{{1 # | 70 .time.of.last.request <<- Sys.time() |
| 73 ######################### | 71 }) |
| 74 | 72 |
| 75 UrlRequestScheduler$methods( .get.curl.opts = function(opts = list()) { | 73 #################### |
| 76 opts <- RCurl::curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE, .opts = opts) | 74 # GET CURL OPTIONS # |
| 77 return(opts) | 75 #################### |
| 78 }) | 76 |
| 79 | 77 UrlRequestScheduler$methods( .get_curl_opts = function(url) { |
| 80 ################### | 78 opts <- curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE) |
| 81 # DO GET URL {{{1 # | 79 return(opts) |
| 82 ################### | 80 }) |
| 83 | 81 |
| 84 UrlRequestScheduler$methods( .doGetUrl = function(url, params = list(), method = BIODB.GET, opts = .self$.get.curl.opts()) { | 82 ########### |
| 85 | 83 # GET URL # |
| 86 content <- NA_character_ | 84 ########### |
| 87 | 85 |
| 88 # Use form to send URL request | 86 UrlRequestScheduler$methods( .doGetUrl = function(url, params = NULL, method = RLIB.GET) { |
| 89 if ( method == BIODB.POST || ( ! is.null(params) && ! is.na(params) && length(params) > 0)) { | 87 |
| 90 switch(method, | 88 content <- NA_character_ |
| 91 GET = { content <- RCurl::getForm(url, .opts = opts, .params = params) }, | 89 |
| 92 POST = { content <- RCurl::postForm(url, .opts = opts, .params = params) }, | 90 # Use form to send URL request |
| 93 stop(paste('Unknown method "', method, '".')) | 91 if ( ! is.null(params) && ! is.na(params)) |
| 94 ) | 92 switch(method, |
| 95 } | 93 GET = { content <- getForm(url, .opts = .self$.get_curl_opts(), .params = params) }, |
| 96 | 94 POST = { content <- postForm(url, .opts = .self$.get_curl_opts(), .params = params) }, |
| 97 # Get URL normally | 95 stop(paste('Unknown method "', method, '".')) |
| 98 else { | 96 ) |
| 99 content <- RCurl::getURL(url, .opts = opts, ssl.verifypeer = .self$.ssl.verifypeer) | 97 |
| 100 } | 98 # Get URL normally |
| 101 return(content) | 99 else |
| 102 }) | 100 content <- getURL(url, .opts = .self$.get_curl_opts(), ssl.verifypeer = .self$.ssl.verifypeer) |
| 103 | 101 |
| 104 ########################## | 102 return(content) |
| 105 # SEND SOAP REQUEST {{{1 # | 103 }) |
| 106 ########################## | 104 |
| 107 | 105 UrlRequestScheduler$methods( getUrl = function(url, params = NULL, method = RLIB.GET) { |
| 108 UrlRequestScheduler$methods( sendSoapRequest = function(url, request) { | 106 |
| 109 header <- c(Accept="text/xml", Accept="multipart/*", 'Content-Type'="text/xml; charset=utf-8") | 107 # Load library here and not inside .doGetUrl() since it is called from inside a try/catch clause, hence if library is missing the error will be ignored. |
| 110 opts <- .self$.get.curl.opts(list(httpheader = header, postfields = request)) | 108 library(bitops) |
| 111 results <- .self$getUrl(url, method = BIODB.POST, opts = opts) | 109 library(RCurl) |
| 112 return(results) | 110 |
| 113 }) | 111 content <- NA_character_ |
| 114 | 112 |
| 115 ################ | 113 # Wait required time between two requests |
| 116 # GET URL {{{1 # | 114 .self$.wait.as.needed() |
| 117 ################ | 115 |
| 118 | 116 # Run query |
| 119 UrlRequestScheduler$methods( getUrl = function(url, params = list(), method = BIODB.GET, opts = .self$.get.curl.opts()) { | 117 for (i in seq(.self$.nb.max.tries)) { |
| 120 | 118 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method) }, |
| 121 content <- NA_character_ | 119 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } ) |
| 122 | 120 if ( ! is.na(content)) |
| 123 # Wait required time between two requests | 121 break |
| 124 .self$.wait.as.needed() | 122 } |
| 125 | 123 |
| 126 # Run query | 124 return(content) |
| 127 for (i in seq(.self$.nb.max.tries)) { | 125 }) |
| 128 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method, opts = opts) }, | 126 } |
| 129 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } ) | |
| 130 if ( ! is.na(content)) | |
| 131 break | |
| 132 } | |
| 133 | |
| 134 return(content) | |
| 135 }) |
