Mercurial > repos > prog > lcmsmatching
comparison UrlRequestScheduler.R @ 5:fb9c0409d85c draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
author | prog |
---|---|
date | Wed, 19 Apr 2017 10:00:05 -0400 |
parents | 20d69a062da3 |
children |
comparison
equal
deleted
inserted
replaced
4:b34c14151f25 | 5:fb9c0409d85c |
---|---|
1 ############# | 1 if ( ! exists('UrlRequestScheduler')) { # Do not load again if already loaded |
2 # CONSTANTS # | |
3 ############# | |
4 | 2 |
5 BIODB.GET <- 'GET' | 3 ############# |
6 BIODB.POST <- 'POST' | 4 # CONSTANTS # |
5 ############# | |
7 | 6 |
8 ##################### | 7 RLIB.GET <- 'GET' |
9 # CLASS DECLARATION # | 8 RLIB.POST <- 'POST' |
10 ##################### | |
11 | 9 |
12 UrlRequestScheduler <- methods::setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer")) | 10 ##################### |
13 | 11 # CLASS DECLARATION # |
14 # n: number of connections | 12 ##################### |
15 # t: time (in seconds) | 13 |
16 | 14 UrlRequestScheduler <- setRefClass("UrlRequestScheduler", fields = list(.n = "numeric", .t = "numeric", .time.of.last.request = "ANY", .useragent = "character", .ssl.verifypeer = "logical", .nb.max.tries = "integer", .verbose = "integer")) |
17 # The scheduler restrict the number of connections at n per t seconds. | 15 |
18 | 16 # n: number of connections |
19 ############### | 17 # t: time (in seconds) |
20 # CONSTRUCTOR # | 18 |
21 ############### | 19 # The scheduler restrict the number of connections at n per t seconds. |
22 | 20 |
23 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) { | 21 ############### |
24 .n <<- n | 22 # CONSTRUCTOR # |
25 .t <<- t | 23 ############### |
26 .time.of.last.request <<- -1 | 24 |
27 .useragent <<- useragent | 25 UrlRequestScheduler$methods( initialize = function(n = 1, t = 1, useragent = NA_character_, ssl.verifypeer = TRUE, ...) { |
28 .nb.max.tries <<- 10L | 26 .n <<- n |
29 .ssl.verifypeer <<- ssl.verifypeer | 27 .t <<- t |
30 .verbose <<- 0L | 28 .time.of.last.request <<- -1 |
31 callSuper(...) # calls super-class initializer with remaining parameters | 29 .useragent <<- useragent |
32 }) | 30 .nb.max.tries <<- 10L |
33 | 31 .ssl.verifypeer <<- ssl.verifypeer |
34 ################## | 32 .verbose <<- 0L |
35 # SET USER AGENT # | 33 callSuper(...) # calls super-class initializer with remaining parameters |
36 ################## | 34 }) |
37 | 35 |
38 UrlRequestScheduler$methods( setUserAgent = function(useragent) { | 36 ################## |
39 .useragent <<- useragent | 37 # SET USER AGENT # |
40 }) | 38 ################## |
41 | 39 |
42 ############### | 40 UrlRequestScheduler$methods( setUserAgent = function(useragent) { |
43 # SET VERBOSE # | 41 .useragent <<- useragent |
44 ############### | 42 }) |
45 | 43 |
46 UrlRequestScheduler$methods( setVerbose = function(verbose) { | 44 ############### |
47 .verbose <<- verbose | 45 # SET VERBOSE # |
48 }) | 46 ############### |
49 | 47 |
50 ################## | 48 UrlRequestScheduler$methods( setVerbose = function(verbose) { |
51 # WAIT AS NEEDED # | 49 .verbose <<- verbose |
52 ################## | 50 }) |
53 | 51 |
54 # Wait the specified between two requests. | 52 ################## |
55 UrlRequestScheduler$methods( .wait.as.needed = function() { | 53 # WAIT AS NEEDED # |
56 | 54 ################## |
57 # Compute minimum waiting time between two URL requests | 55 |
58 waiting_time <- .self$.t / .self$.n | 56 # Wait the specified between two requests. |
59 | 57 UrlRequestScheduler$methods( .wait.as.needed = function() { |
60 # Wait, if needed, before previous URL request and this new URL request. | 58 |
61 if (.self$.time.of.last.request > 0) { | 59 # Compute minimum waiting time between two URL requests |
62 spent_time <- Sys.time() - .self$.time.of.last.request | 60 waiting_time <- .self$.t / .self$.n |
63 if (spent_time < waiting_time) | 61 |
64 Sys.sleep(waiting_time - spent_time) | 62 # Wait, if needed, before previous URL request and this new URL request. |
65 } | 63 if (.self$.time.of.last.request > 0) { |
66 | 64 spent_time <- Sys.time() - .self$.time.of.last.request |
67 # Store current time | 65 if (spent_time < waiting_time) |
68 .time.of.last.request <<- Sys.time() | 66 Sys.sleep(waiting_time - spent_time) |
69 }) | 67 } |
70 | 68 |
71 ######################### | 69 # Store current time |
72 # GET CURL OPTIONS {{{1 # | 70 .time.of.last.request <<- Sys.time() |
73 ######################### | 71 }) |
74 | 72 |
75 UrlRequestScheduler$methods( .get.curl.opts = function(opts = list()) { | 73 #################### |
76 opts <- RCurl::curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE, .opts = opts) | 74 # GET CURL OPTIONS # |
77 return(opts) | 75 #################### |
78 }) | 76 |
79 | 77 UrlRequestScheduler$methods( .get_curl_opts = function(url) { |
80 ################### | 78 opts <- curlOptions(useragent = .self$.useragent, timeout.ms = 60000, verbose = FALSE) |
81 # DO GET URL {{{1 # | 79 return(opts) |
82 ################### | 80 }) |
83 | 81 |
84 UrlRequestScheduler$methods( .doGetUrl = function(url, params = list(), method = BIODB.GET, opts = .self$.get.curl.opts()) { | 82 ########### |
85 | 83 # GET URL # |
86 content <- NA_character_ | 84 ########### |
87 | 85 |
88 # Use form to send URL request | 86 UrlRequestScheduler$methods( .doGetUrl = function(url, params = NULL, method = RLIB.GET) { |
89 if ( method == BIODB.POST || ( ! is.null(params) && ! is.na(params) && length(params) > 0)) { | 87 |
90 switch(method, | 88 content <- NA_character_ |
91 GET = { content <- RCurl::getForm(url, .opts = opts, .params = params) }, | 89 |
92 POST = { content <- RCurl::postForm(url, .opts = opts, .params = params) }, | 90 # Use form to send URL request |
93 stop(paste('Unknown method "', method, '".')) | 91 if ( ! is.null(params) && ! is.na(params)) |
94 ) | 92 switch(method, |
95 } | 93 GET = { content <- getForm(url, .opts = .self$.get_curl_opts(), .params = params) }, |
96 | 94 POST = { content <- postForm(url, .opts = .self$.get_curl_opts(), .params = params) }, |
97 # Get URL normally | 95 stop(paste('Unknown method "', method, '".')) |
98 else { | 96 ) |
99 content <- RCurl::getURL(url, .opts = opts, ssl.verifypeer = .self$.ssl.verifypeer) | 97 |
100 } | 98 # Get URL normally |
101 return(content) | 99 else |
102 }) | 100 content <- getURL(url, .opts = .self$.get_curl_opts(), ssl.verifypeer = .self$.ssl.verifypeer) |
103 | 101 |
104 ########################## | 102 return(content) |
105 # SEND SOAP REQUEST {{{1 # | 103 }) |
106 ########################## | 104 |
107 | 105 UrlRequestScheduler$methods( getUrl = function(url, params = NULL, method = RLIB.GET) { |
108 UrlRequestScheduler$methods( sendSoapRequest = function(url, request) { | 106 |
109 header <- c(Accept="text/xml", Accept="multipart/*", 'Content-Type'="text/xml; charset=utf-8") | 107 # Load library here and not inside .doGetUrl() since it is called from inside a try/catch clause, hence if library is missing the error will be ignored. |
110 opts <- .self$.get.curl.opts(list(httpheader = header, postfields = request)) | 108 library(bitops) |
111 results <- .self$getUrl(url, method = BIODB.POST, opts = opts) | 109 library(RCurl) |
112 return(results) | 110 |
113 }) | 111 content <- NA_character_ |
114 | 112 |
115 ################ | 113 # Wait required time between two requests |
116 # GET URL {{{1 # | 114 .self$.wait.as.needed() |
117 ################ | 115 |
118 | 116 # Run query |
119 UrlRequestScheduler$methods( getUrl = function(url, params = list(), method = BIODB.GET, opts = .self$.get.curl.opts()) { | 117 for (i in seq(.self$.nb.max.tries)) { |
120 | 118 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method) }, |
121 content <- NA_character_ | 119 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } ) |
122 | 120 if ( ! is.na(content)) |
123 # Wait required time between two requests | 121 break |
124 .self$.wait.as.needed() | 122 } |
125 | 123 |
126 # Run query | 124 return(content) |
127 for (i in seq(.self$.nb.max.tries)) { | 125 }) |
128 tryCatch({ content <- .self$.doGetUrl(url, params = params, method = method, opts = opts) }, | 126 } |
129 error = function(e) { if (.self$.verbose > 0) print("Retry connection to server...") } ) | |
130 if ( ! is.na(content)) | |
131 break | |
132 } | |
133 | |
134 return(content) | |
135 }) |