comparison UrlRequestScheduler.R @ 5:fb9c0409d85c draft

planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author prog
date Wed, 19 Apr 2017 10:00:05 -0400
parents 20d69a062da3
children
comparison
equal deleted inserted replaced
4:b34c14151f25 5:fb9c0409d85c
1 ############# 1 if ( ! exists('UrlRequestScheduler')) { # Do not load again if already loaded
2 # CONSTANTS #
3 #############
4 2
5 BIODB.GET <- 'GET' 3 #############
6 BIODB.POST <- 'POST' 4 # CONSTANTS #
5 #############
7 6
8 ##################### 7 RLIB.GET <- 'GET'
9 # CLASS DECLARATION # 8 RLIB.POST <- 'POST'
10 #####################
11 9
12 UrlRequestScheduler <- methods::setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer")) 10 #####################
13 11 # CLASS DECLARATION #
14 # n: number of connections 12 #####################
15 # t: time (in seconds) 13
16 14 UrlRequestScheduler <- setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer"))
17 # The scheduler restrict the number of connections at n per t seconds. 15
18 16 # n: number of connections
19 ############### 17 # t: time (in seconds)
20 # CONSTRUCTOR # 18
21 ############### 19 # The scheduler restrict the number of connections at n per t seconds.
22 20
23 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) { 21 ###############
24 .n <<- n 22 # CONSTRUCTOR #
25 .t <<- t 23 ###############
26 .time.of.last.request <<- -1 24
27 .useragent <<- useragent 25 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) {
28 .nb.max.tries <<- 10L 26 .n <<- n
29 .ssl.verifypeer <<- ssl.verifypeer 27 .t <<- t
30 .verbose <<- 0L 28 .time.of.last.request <<- -1
31 callSuper(...) # calls super-class initializer with remaining parameters 29 .useragent <<- useragent
32 }) 30 .nb.max.tries <<- 10L
33 31 .ssl.verifypeer <<- ssl.verifypeer
34 ################## 32 .verbose <<- 0L
35 # SET USER AGENT # 33 callSuper(...) # calls super-class initializer with remaining parameters
36 ################## 34 })
37 35
38 UrlRequestScheduler$methods( setUserAgent = function(useragent) { 36 ##################
39 .useragent <<- useragent 37 # SET USER AGENT #
40 }) 38 ##################
41 39
42 ############### 40 UrlRequestScheduler$methods( setUserAgent = function(useragent) {
43 # SET VERBOSE # 41 .useragent <<- useragent
44 ############### 42 })
45 43
46 UrlRequestScheduler$methods( setVerbose = function(verbose) { 44 ###############
47 .verbose <<- verbose 45 # SET VERBOSE #
48 }) 46 ###############
49 47
50 ################## 48 UrlRequestScheduler$methods( setVerbose = function(verbose) {
51 # WAIT AS NEEDED # 49 .verbose <<- verbose
52 ################## 50 })
53 51
54 # Wait the specified between two requests. 52 ##################
55 UrlRequestScheduler$methods( .wait.as.needed = function() { 53 # WAIT AS NEEDED #
56 54 ##################
57 # Compute minimum waiting time between two URL requests 55
58 waiting_time <- .self$.t / .self$.n 56 # Wait the specified between two requests.
59 57 UrlRequestScheduler$methods( .wait.as.needed = function() {
60 # Wait, if needed, before previous URL request and this new URL request. 58
61 if (.self$.time.of.last.request > 0) { 59 # Compute minimum waiting time between two URL requests
62 spent_time <- Sys.time() - .self$.time.of.last.request 60 waiting_time <- .self$.t / .self$.n
63 if (spent_time < waiting_time) 61
64 Sys.sleep(waiting_time - spent_time) 62 # Wait, if needed, before previous URL request and this new URL request.
65 } 63 if (.self$.time.of.last.request > 0) {
66 64 spent_time <- Sys.time() - .self$.time.of.last.request
67 # Store current time 65 if (spent_time < waiting_time)
68 .time.of.last.request <<- Sys.time() 66 Sys.sleep(waiting_time - spent_time)
69 }) 67 }
70 68
71 ######################### 69 # Store current time
72 # GET CURL OPTIONS {{{1 # 70 .time.of.last.request <<- Sys.time()
73 ######################### 71 })
74 72
75 UrlRequestScheduler$methods( .get.curl.opts = function(opts = list()) { 73 ####################
76 opts <- RCurl::curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE, .opts = opts) 74 # GET CURL OPTIONS #
77 return(opts) 75 ####################
78 }) 76
79 77 UrlRequestScheduler$methods( .get_curl_opts = function(url) {
80 ################### 78 opts <- curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE)
81 # DO GET URL {{{1 # 79 return(opts)
82 ################### 80 })
83 81
84 UrlRequestScheduler$methods( .doGetUrl = function(url, params = list(), method = BIODB.GET, opts = .self$.get.curl.opts()) { 82 ###########
85 83 # GET URL #
86 content <- NA_character_ 84 ###########
87 85
88 # Use form to send URL request 86 UrlRequestScheduler$methods( .doGetUrl = function(url, params = NULL, method = RLIB.GET) {
89 if ( method == BIODB.POST || ( ! is.null(params) && ! is.na(params) && length(params) > 0)) { 87
90 switch(method, 88 content <- NA_character_
91 GET = { content <- RCurl::getForm(url, .opts = opts, .params = params) }, 89
92 POST = { content <- RCurl::postForm(url, .opts = opts, .params = params) }, 90 # Use form to send URL request
93 stop(paste('Unknown method "', method, '".')) 91 if ( ! is.null(params) && ! is.na(params))
94 ) 92 switch(method,
95 } 93 GET = { content <- getForm(url, .opts = .self$.get_curl_opts(), .params = params) },
96 94 POST = { content <- postForm(url, .opts = .self$.get_curl_opts(), .params = params) },
97 # Get URL normally 95 stop(paste('Unknown method "', method, '".'))
98 else { 96 )
99 content <- RCurl::getURL(url, .opts = opts, ssl.verifypeer = .self$.ssl.verifypeer) 97
100 } 98 # Get URL normally
101 return(content) 99 else
102 }) 100 content <- getURL(url, .opts = .self$.get_curl_opts(), ssl.verifypeer = .self$.ssl.verifypeer)
103 101
104 ########################## 102 return(content)
105 # SEND SOAP REQUEST {{{1 # 103 })
106 ########################## 104
107 105 UrlRequestScheduler$methods( getUrl = function(url, params = NULL, method = RLIB.GET) {
108 UrlRequestScheduler$methods( sendSoapRequest = function(url, request) { 106
109 header <- c(Accept="text/xml", Accept="multipart/*", 'Content-Type'="text/xml; charset=utf-8") 107 # Load library here and not inside .doGetUrl() since it is called from inside a try/catch clause, hence if library is missing the error will be ignored.
110 opts <- .self$.get.curl.opts(list(httpheader = header, postfields = request)) 108 library(bitops)
111 results <- .self$getUrl(url, method = BIODB.POST, opts = opts) 109 library(RCurl)
112 return(results) 110
113 }) 111 content <- NA_character_
114 112
115 ################ 113 # Wait required time between two requests
116 # GET URL {{{1 # 114 .self$.wait.as.needed()
117 ################ 115
118 116 # Run query
119 UrlRequestScheduler$methods( getUrl = function(url, params = list(), method = BIODB.GET, opts = .self$.get.curl.opts()) { 117 for (i in seq(.self$.nb.max.tries)) {
120 118 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method) },
121 content <- NA_character_ 119 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } )
122 120 if ( ! is.na(content))
123 # Wait required time between two requests 121 break
124 .self$.wait.as.needed() 122 }
125 123
126 # Run query 124 return(content)
127 for (i in seq(.self$.nb.max.tries)) { 125 })
128 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method, opts = opts) }, 126 }
129 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } )
130 if ( ! is.na(content))
131 break
132 }
133
134 return(content)
135 })