Mercurial > repos > prog > lcmsmatching
comparison UrlRequestScheduler.R @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
| author | prog |
|---|---|
| date | Fri, 22 Feb 2019 16:04:22 -0500 |
| parents | fb9c0409d85c |
| children |
comparison
equal
deleted
inserted
replaced
| 5:fb9c0409d85c | 6:f86fec07f392 |
|---|---|
| 1 if ( ! exists('UrlRequestScheduler')) { # Do not load again if already loaded | |
| 2 | |
| 3 ############# | |
| 4 # CONSTANTS # | |
| 5 ############# | |
| 6 | |
| 7 RLIB.GET <- 'GET' | |
| 8 RLIB.POST <- 'POST' | |
| 9 | |
| 10 ##################### | |
| 11 # CLASS DECLARATION # | |
| 12 ##################### | |
| 13 | |
| 14 UrlRequestScheduler <- setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer")) | |
| 15 | |
| 16 # n: number of connections | |
| 17 # t: time (in seconds) | |
| 18 | |
| 19 # The scheduler restrict the number of connections at n per t seconds. | |
| 20 | |
| 21 ############### | |
| 22 # CONSTRUCTOR # | |
| 23 ############### | |
| 24 | |
| 25 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) { | |
| 26 .n <<- n | |
| 27 .t <<- t | |
| 28 .time.of.last.request <<- -1 | |
| 29 .useragent <<- useragent | |
| 30 .nb.max.tries <<- 10L | |
| 31 .ssl.verifypeer <<- ssl.verifypeer | |
| 32 .verbose <<- 0L | |
| 33 callSuper(...) # calls super-class initializer with remaining parameters | |
| 34 }) | |
| 35 | |
| 36 ################## | |
| 37 # SET USER AGENT # | |
| 38 ################## | |
| 39 | |
| 40 UrlRequestScheduler$methods( setUserAgent = function(useragent) { | |
| 41 .useragent <<- useragent | |
| 42 }) | |
| 43 | |
| 44 ############### | |
| 45 # SET VERBOSE # | |
| 46 ############### | |
| 47 | |
| 48 UrlRequestScheduler$methods( setVerbose = function(verbose) { | |
| 49 .verbose <<- verbose | |
| 50 }) | |
| 51 | |
| 52 ################## | |
| 53 # WAIT AS NEEDED # | |
| 54 ################## | |
| 55 | |
| 56 # Wait the specified between two requests. | |
| 57 UrlRequestScheduler$methods( .wait.as.needed = function() { | |
| 58 | |
| 59 # Compute minimum waiting time between two URL requests | |
| 60 waiting_time <- .self$.t / .self$.n | |
| 61 | |
| 62 # Wait, if needed, before previous URL request and this new URL request. | |
| 63 if (.self$.time.of.last.request > 0) { | |
| 64 spent_time <- Sys.time() - .self$.time.of.last.request | |
| 65 if (spent_time < waiting_time) | |
| 66 Sys.sleep(waiting_time - spent_time) | |
| 67 } | |
| 68 | |
| 69 # Store current time | |
| 70 .time.of.last.request <<- Sys.time() | |
| 71 }) | |
| 72 | |
| 73 #################### | |
| 74 # GET CURL OPTIONS # | |
| 75 #################### | |
| 76 | |
| 77 UrlRequestScheduler$methods( .get_curl_opts = function(url) { | |
| 78 opts <- curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE) | |
| 79 return(opts) | |
| 80 }) | |
| 81 | |
| 82 ########### | |
| 83 # GET URL # | |
| 84 ########### | |
| 85 | |
| 86 UrlRequestScheduler$methods( .doGetUrl = function(url, params = NULL, method = RLIB.GET) { | |
| 87 | |
| 88 content <- NA_character_ | |
| 89 | |
| 90 # Use form to send URL request | |
| 91 if ( ! is.null(params) && ! is.na(params)) | |
| 92 switch(method, | |
| 93 GET = { content <- getForm(url, .opts = .self$.get_curl_opts(), .params = params) }, | |
| 94 POST = { content <- postForm(url, .opts = .self$.get_curl_opts(), .params = params) }, | |
| 95 stop(paste('Unknown method "', method, '".')) | |
| 96 ) | |
| 97 | |
| 98 # Get URL normally | |
| 99 else | |
| 100 content <- getURL(url, .opts = .self$.get_curl_opts(), ssl.verifypeer = .self$.ssl.verifypeer) | |
| 101 | |
| 102 return(content) | |
| 103 }) | |
| 104 | |
| 105 UrlRequestScheduler$methods( getUrl = function(url, params = NULL, method = RLIB.GET) { | |
| 106 | |
| 107 # Load library here and not inside .doGetUrl() since it is called from inside a try/catch clause, hence if library is missing the error will be ignored. | |
| 108 library(bitops) | |
| 109 library(RCurl) | |
| 110 | |
| 111 content <- NA_character_ | |
| 112 | |
| 113 # Wait required time between two requests | |
| 114 .self$.wait.as.needed() | |
| 115 | |
| 116 # Run query | |
| 117 for (i in seq(.self$.nb.max.tries)) { | |
| 118 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method) }, | |
| 119 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } ) | |
| 120 if ( ! is.na(content)) | |
| 121 break | |
| 122 } | |
| 123 | |
| 124 return(content) | |
| 125 }) | |
| 126 } |
