comparison UrlRequestScheduler.R @ 6:f86fec07f392 draft default tip

planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author prog
date Fri, 22 Feb 2019 16:04:22 -0500
parents fb9c0409d85c
children
comparison
equal deleted inserted replaced
5:fb9c0409d85c 6:f86fec07f392
1 if ( ! exists('UrlRequestScheduler')) { # Do not load again if already loaded
2
3 #############
4 # CONSTANTS #
5 #############
6
7 RLIB.GET <- 'GET'
8 RLIB.POST <- 'POST'
9
10 #####################
11 # CLASS DECLARATION #
12 #####################
13
14 UrlRequestScheduler <- setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer"))
15
16 # n: number of connections
17 # t: time (in seconds)
18
19 # The scheduler restrict the number of connections at n per t seconds.
20
21 ###############
22 # CONSTRUCTOR #
23 ###############
24
25 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) {
26 .n <<- n
27 .t <<- t
28 .time.of.last.request <<- -1
29 .useragent <<- useragent
30 .nb.max.tries <<- 10L
31 .ssl.verifypeer <<- ssl.verifypeer
32 .verbose <<- 0L
33 callSuper(...) # calls super-class initializer with remaining parameters
34 })
35
36 ##################
37 # SET USER AGENT #
38 ##################
39
40 UrlRequestScheduler$methods( setUserAgent = function(useragent) {
41 .useragent <<- useragent
42 })
43
44 ###############
45 # SET VERBOSE #
46 ###############
47
48 UrlRequestScheduler$methods( setVerbose = function(verbose) {
49 .verbose <<- verbose
50 })
51
52 ##################
53 # WAIT AS NEEDED #
54 ##################
55
56 # Wait the specified between two requests.
57 UrlRequestScheduler$methods( .wait.as.needed = function() {
58
59 # Compute minimum waiting time between two URL requests
60 waiting_time <- .self$.t / .self$.n
61
62 # Wait, if needed, before previous URL request and this new URL request.
63 if (.self$.time.of.last.request > 0) {
64 spent_time <- Sys.time() - .self$.time.of.last.request
65 if (spent_time < waiting_time)
66 Sys.sleep(waiting_time - spent_time)
67 }
68
69 # Store current time
70 .time.of.last.request <<- Sys.time()
71 })
72
73 ####################
74 # GET CURL OPTIONS #
75 ####################
76
77 UrlRequestScheduler$methods( .get_curl_opts = function(url) {
78 opts <- curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE)
79 return(opts)
80 })
81
82 ###########
83 # GET URL #
84 ###########
85
86 UrlRequestScheduler$methods( .doGetUrl = function(url, params = NULL, method = RLIB.GET) {
87
88 content <- NA_character_
89
90 # Use form to send URL request
91 if ( ! is.null(params) && ! is.na(params))
92 switch(method,
93 GET = { content <- getForm(url, .opts = .self$.get_curl_opts(), .params = params) },
94 POST = { content <- postForm(url, .opts = .self$.get_curl_opts(), .params = params) },
95 stop(paste('Unknown method "', method, '".'))
96 )
97
98 # Get URL normally
99 else
100 content <- getURL(url, .opts = .self$.get_curl_opts(), ssl.verifypeer = .self$.ssl.verifypeer)
101
102 return(content)
103 })
104
105 UrlRequestScheduler$methods( getUrl = function(url, params = NULL, method = RLIB.GET) {
106
107 # Load library here and not inside .doGetUrl() since it is called from inside a try/catch clause, hence if library is missing the error will be ignored.
108 library(bitops)
109 library(RCurl)
110
111 content <- NA_character_
112
113 # Wait required time between two requests
114 .self$.wait.as.needed()
115
116 # Run query
117 for (i in seq(.self$.nb.max.tries)) {
118 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method) },
119 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } )
120 if ( ! is.na(content))
121 break
122 }
123
124 return(content)
125 })
126 }