comparison UrlRequestScheduler.R @ 2:20d69a062da3 draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit d4048accde6bdfd5b3e14f5394902d38991854f8
author prog
date Thu, 02 Mar 2017 08:55:00 -0500
parents e66bb061af06
children fb9c0409d85c
comparison
equal deleted inserted replaced
1:253d531a0193 2:20d69a062da3
1 if ( ! exists('UrlRequestScheduler')) { # Do not load again if already loaded 1 #############
2 # CONSTANTS #
3 #############
2 4
3 ############# 5 BIODB.GET <- 'GET'
4 # CONSTANTS # 6 BIODB.POST <- 'POST'
5 #############
6 7
7 RLIB.GET <- 'GET' 8 #####################
8 RLIB.POST <- 'POST' 9 # CLASS DECLARATION #
10 #####################
9 11
10 ##################### 12 UrlRequestScheduler <- methods::setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer"))
11 # CLASS DECLARATION # 13
12 ##################### 14 # n: number of connections
13 15 # t: time (in seconds)
14 UrlRequestScheduler <- setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer")) 16
15 17 # The scheduler restrict the number of connections at n per t seconds.
16 # n: number of connections 18
17 # t: time (in seconds) 19 ###############
18 20 # CONSTRUCTOR #
19 # The scheduler restrict the number of connections at n per t seconds. 21 ###############
20 22
21 ############### 23 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) {
22 # CONSTRUCTOR # 24 .n <<- n
23 ############### 25 .t <<- t
24 26 .time.of.last.request <<- -1
25 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) { 27 .useragent <<- useragent
26 .n <<- n 28 .nb.max.tries <<- 10L
27 .t <<- t 29 .ssl.verifypeer <<- ssl.verifypeer
28 .time.of.last.request <<- -1 30 .verbose <<- 0L
29 .useragent <<- useragent 31 callSuper(...) # calls super-class initializer with remaining parameters
30 .nb.max.tries <<- 10L 32 })
31 .ssl.verifypeer <<- ssl.verifypeer 33
32 .verbose <<- 0L 34 ##################
33 callSuper(...) # calls super-class initializer with remaining parameters 35 # SET USER AGENT #
34 }) 36 ##################
35 37
36 ################## 38 UrlRequestScheduler$methods( setUserAgent = function(useragent) {
37 # SET USER AGENT # 39 .useragent <<- useragent
38 ################## 40 })
39 41
40 UrlRequestScheduler$methods( setUserAgent = function(useragent) { 42 ###############
41 .useragent <<- useragent 43 # SET VERBOSE #
42 }) 44 ###############
43 45
44 ############### 46 UrlRequestScheduler$methods( setVerbose = function(verbose) {
45 # SET VERBOSE # 47 .verbose <<- verbose
46 ############### 48 })
47 49
48 UrlRequestScheduler$methods( setVerbose = function(verbose) { 50 ##################
49 .verbose <<- verbose 51 # WAIT AS NEEDED #
50 }) 52 ##################
51 53
52 ################## 54 # Wait the specified between two requests.
53 # WAIT AS NEEDED # 55 UrlRequestScheduler$methods( .wait.as.needed = function() {
54 ################## 56
55 57 # Compute minimum waiting time between two URL requests
56 # Wait the specified between two requests. 58 waiting_time <- .self$.t / .self$.n
57 UrlRequestScheduler$methods( .wait.as.needed = function() { 59
58 60 # Wait, if needed, before previous URL request and this new URL request.
59 # Compute minimum waiting time between two URL requests 61 if (.self$.time.of.last.request > 0) {
60 waiting_time <- .self$.t / .self$.n 62 spent_time <- Sys.time() - .self$.time.of.last.request
61 63 if (spent_time < waiting_time)
62 # Wait, if needed, before previous URL request and this new URL request. 64 Sys.sleep(waiting_time - spent_time)
63 if (.self$.time.of.last.request > 0) { 65 }
64 spent_time <- Sys.time() - .self$.time.of.last.request 66
65 if (spent_time < waiting_time) 67 # Store current time
66 Sys.sleep(waiting_time - spent_time) 68 .time.of.last.request <<- Sys.time()
67 } 69 })
68 70
69 # Store current time 71 #########################
70 .time.of.last.request <<- Sys.time() 72 # GET CURL OPTIONS {{{1 #
71 }) 73 #########################
72 74
73 #################### 75 UrlRequestScheduler$methods( .get.curl.opts = function(opts = list()) {
74 # GET CURL OPTIONS # 76 opts <- RCurl::curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE, .opts = opts)
75 #################### 77 return(opts)
76 78 })
77 UrlRequestScheduler$methods( .get_curl_opts = function(url) { 79
78 opts <- curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE) 80 ###################
79 return(opts) 81 # DO GET URL {{{1 #
80 }) 82 ###################
81 83
82 ########### 84 UrlRequestScheduler$methods( .doGetUrl = function(url, params = list(), method = BIODB.GET, opts = .self$.get.curl.opts()) {
83 # GET URL # 85
84 ########### 86 content <- NA_character_
85 87
86 UrlRequestScheduler$methods( .doGetUrl = function(url, params = NULL, method = RLIB.GET) { 88 # Use form to send URL request
87 89 if ( method == BIODB.POST || ( ! is.null(params) && ! is.na(params) && length(params) > 0)) {
88 content <- NA_character_ 90 switch(method,
89 91 GET = { content <- RCurl::getForm(url, .opts = opts, .params = params) },
90 # Use form to send URL request 92 POST = { content <- RCurl::postForm(url, .opts = opts, .params = params) },
91 if ( ! is.null(params) && ! is.na(params)) 93 stop(paste('Unknown method "', method, '".'))
92 switch(method, 94 )
93 GET = { content <- getForm(url, .opts = .self$.get_curl_opts(), .params = params) }, 95 }
94 POST = { content <- postForm(url, .opts = .self$.get_curl_opts(), .params = params) }, 96
95 stop(paste('Unknown method "', method, '".')) 97 # Get URL normally
96 ) 98 else {
97 99 content <- RCurl::getURL(url, .opts = opts, ssl.verifypeer = .self$.ssl.verifypeer)
98 # Get URL normally 100 }
99 else 101 return(content)
100 content <- getURL(url, .opts = .self$.get_curl_opts(), ssl.verifypeer = .self$.ssl.verifypeer) 102 })
101 103
102 return(content) 104 ##########################
103 }) 105 # SEND SOAP REQUEST {{{1 #
104 106 ##########################
105 UrlRequestScheduler$methods( getUrl = function(url, params = NULL, method = RLIB.GET) { 107
106 108 UrlRequestScheduler$methods( sendSoapRequest = function(url, request) {
107 # Load library here and not inside .doGetUrl() since it is called from inside a try/catch clause, hence if library is missing the error will be ignored. 109 header <- c(Accept="text/xml", Accept="multipart/*", 'Content-Type'="text/xml; charset=utf-8")
108 library(bitops) 110 opts <- .self$.get.curl.opts(list(httpheader = header, postfields = request))
109 library(RCurl) 111 results <- .self$getUrl(url, method = BIODB.POST, opts = opts)
110 112 return(results)
111 content <- NA_character_ 113 })
112 114
113 # Wait required time between two requests 115 ################
114 .self$.wait.as.needed() 116 # GET URL {{{1 #
115 117 ################
116 # Run query 118
117 for (i in seq(.self$.nb.max.tries)) { 119 UrlRequestScheduler$methods( getUrl = function(url, params = list(), method = BIODB.GET, opts = .self$.get.curl.opts()) {
118 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method) }, 120
119 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } ) 121 content <- NA_character_
120 if ( ! is.na(content)) 122
121 break 123 # Wait required time between two requests
122 } 124 .self$.wait.as.needed()
123 125
124 return(content) 126 # Run query
125 }) 127 for (i in seq(.self$.nb.max.tries)) {
126 } 128 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method, opts = opts) },
129 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } )
130 if ( ! is.na(content))
131 break
132 }
133
134 return(content)
135 })