comparison toolfactory/rgToolFactory2.py @ 119:8ea1133b9d9a draft

Uploaded
author fubar
date Tue, 05 Jan 2021 00:34:48 +0000
parents e43c43396a70
children
comparison
equal deleted inserted replaced
118:e43c43396a70 119:8ea1133b9d9a
11 # July 2020: BCC was fun and I feel like rip van winkle after 5 years. 11 # July 2020: BCC was fun and I feel like rip van winkle after 5 years.
12 # Decided to 12 # Decided to
13 # 1. Fix the toolfactory so it works - done for simplest case 13 # 1. Fix the toolfactory so it works - done for simplest case
14 # 2. Fix planemo so the toolfactory function works 14 # 2. Fix planemo so the toolfactory function works
15 # 3. Rewrite bits using galaxyxml functions where that makes sense - done 15 # 3. Rewrite bits using galaxyxml functions where that makes sense - done
16 #
17 # uses planemo in a biodocker sort of image as a requirement
18 # otherwise planemo seems to leak dependencies back into the
19 # calling venv. Hilarity ensues.
20
21
22 16
23 import argparse 17 import argparse
24 import copy 18 import copy
25 import datetime
26 import grp
27 import json
28 import logging 19 import logging
29 import os 20 import os
30 import re 21 import re
31 import shutil 22 import shutil
32 import subprocess 23 import subprocess
33 import sys 24 import sys
34 import tarfile 25 import tarfile
35 import tempfile 26 import tempfile
36 import time 27 import time
37 28
38
39 from bioblend import ConnectionError 29 from bioblend import ConnectionError
40 from bioblend import toolshed 30 from bioblend import toolshed
41
42 import docker
43 31
44 import galaxyxml.tool as gxt 32 import galaxyxml.tool as gxt
45 import galaxyxml.tool.parameters as gxtp 33 import galaxyxml.tool.parameters as gxtp
46 34
47 import lxml 35 import lxml
52 verbose = True 40 verbose = True
53 debug = True 41 debug = True
54 toolFactoryURL = "https://github.com/fubar2/toolfactory" 42 toolFactoryURL = "https://github.com/fubar2/toolfactory"
55 ourdelim = "~~~" 43 ourdelim = "~~~"
56 44
57 # --input_files="$intab.input_files~~~$intab.input_CL~~~$intab.input_formats\ 45 # --input_files="$intab.input_files~~~$intab.input_CL~~~
58 #~~~$intab.input_label~~~$intab.input_help" 46 # $intab.input_formats# ~~~$intab.input_label
47 # ~~~$intab.input_help"
59 IPATHPOS = 0 48 IPATHPOS = 0
60 ICLPOS = 1 49 ICLPOS = 1
61 IFMTPOS = 2 50 IFMTPOS = 2
62 ILABPOS = 3 51 ILABPOS = 3
63 IHELPOS = 4 52 IHELPOS = 4
64 IOCLPOS = 5 53 IOCLPOS = 5
65 54
66 # --output_files "$otab.history_name~~~$otab.history_format~~~$otab.history_CL~~~$otab.history_test" 55 # --output_files "$otab.history_name~~~$otab.history_format~~~
56 # $otab.history_CL~~~$otab.history_test"
67 ONAMEPOS = 0 57 ONAMEPOS = 0
68 OFMTPOS = 1 58 OFMTPOS = 1
69 OCLPOS = 2 59 OCLPOS = 2
70 OTESTPOS = 3 60 OTESTPOS = 3
71 OOCLPOS = 4 61 OOCLPOS = 4
72 62
73 63
74 # --additional_parameters="$i.param_name~~~$i.param_value~~~ 64 # --additional_parameters="$i.param_name~~~$i.param_value~~~
75 # $i.param_label~~~$i.param_help~~~$i.param_type~~~$i.CL~~~i$.param_CLoverride" 65 # $i.param_label~~~$i.param_help~~~$i.param_type
66 # ~~~$i.CL~~~i$.param_CLoverride"
76 ANAMEPOS = 0 67 ANAMEPOS = 0
77 AVALPOS = 1 68 AVALPOS = 1
78 ALABPOS = 2 69 ALABPOS = 2
79 AHELPPOS = 3 70 AHELPPOS = 3
80 ATYPEPOS = 4 71 ATYPEPOS = 4
104 return s 95 return s
105 except ValueError: 96 except ValueError:
106 return '"%s"' % s 97 return '"%s"' % s
107 98
108 99
109 html_escape_table = {"&": "&amp;", ">": "&gt;", "<": "&lt;", "$": r"\$","#":"&#35;", "$":"&#36;"} 100 html_escape_table = {
110 cheetah_escape_table = {"$": "\$","#":"\#"} 101 "&": "&amp;",
102 ">": "&gt;",
103 "<": "&lt;",
104 "#": "&#35;",
105 "$": "&#36;",
106 }
107 cheetah_escape_table = {"$": "\\$", "#": "\\#"}
108
111 109
112 def html_escape(text): 110 def html_escape(text):
113 """Produce entities within text.""" 111 """Produce entities within text."""
114 return "".join([html_escape_table.get(c, c) for c in text]) 112 return "".join([html_escape_table.get(c, c) for c in text])
113
115 114
116 def cheetah_escape(text): 115 def cheetah_escape(text):
117 """Produce entities within text.""" 116 """Produce entities within text."""
118 return "".join([cheetah_escape_table.get(c, c) for c in text]) 117 return "".join([cheetah_escape_table.get(c, c) for c in text])
119 118
122 """Revert entities within text. Multiple character targets so use replace""" 121 """Revert entities within text. Multiple character targets so use replace"""
123 t = text.replace("&amp;", "&") 122 t = text.replace("&amp;", "&")
124 t = t.replace("&gt;", ">") 123 t = t.replace("&gt;", ">")
125 t = t.replace("&lt;", "<") 124 t = t.replace("&lt;", "<")
126 t = t.replace("\\$", "$") 125 t = t.replace("\\$", "$")
127 t = t.replace("&#36;","$") 126 t = t.replace("&#36;", "$")
128 t = t.replace("&#35;","#") 127 t = t.replace("&#35;", "#")
129 return t 128 return t
130 129
131 130
132 def parse_citations(citations_text): 131 def parse_citations(citations_text):
133 """""" 132 """"""
135 citation_tuples = [] 134 citation_tuples = []
136 for citation in citations: 135 for citation in citations:
137 if citation.startswith("doi"): 136 if citation.startswith("doi"):
138 citation_tuples.append(("doi", citation[len("doi") :].strip())) 137 citation_tuples.append(("doi", citation[len("doi") :].strip()))
139 else: 138 else:
140 citation_tuples.append(("bibtex", citation[len("bibtex") :].strip())) 139 citation_tuples.append(
140 ("bibtex", citation[len("bibtex") :].strip())
141 )
141 return citation_tuples 142 return citation_tuples
142 143
143 144
144 class ScriptRunner: 145 class ScriptRunner:
145 """Wrapper for an arbitrary script 146 """Wrapper for an arbitrary script
166 self.is_positional = self.args.parampass == "positional" 167 self.is_positional = self.args.parampass == "positional"
167 if self.args.sysexe: 168 if self.args.sysexe:
168 self.executeme = self.args.sysexe 169 self.executeme = self.args.sysexe
169 else: 170 else:
170 if self.args.packages: 171 if self.args.packages:
171 self.executeme = self.args.packages.split(",")[0].split(":")[0] 172 self.executeme = (
173 self.args.packages.split(",")[0].split(":")[0].strip()
174 )
172 else: 175 else:
173 self.executeme = None 176 self.executeme = None
174 aCL = self.cl.append 177 aCL = self.cl.append
175 aXCL = self.xmlcl.append 178 aXCL = self.xmlcl.append
176 assert args.parampass in [ 179 assert args.parampass in [
224 aXCL(self.executeme) 227 aXCL(self.executeme)
225 aXCL("$runme") 228 aXCL("$runme")
226 else: 229 else:
227 aCL(self.executeme) 230 aCL(self.executeme)
228 aXCL(self.executeme) 231 aXCL(self.executeme)
229 self.elog = os.path.join(self.repdir, "%s_error_log.txt" % self.tool_name) 232 self.elog = os.path.join(
230 self.tlog = os.path.join(self.repdir, "%s_runner_log.txt" % self.tool_name) 233 self.repdir, "%s_error_log.txt" % self.tool_name
234 )
235 self.tlog = os.path.join(
236 self.repdir, "%s_runner_log.txt" % self.tool_name
237 )
231 238
232 if self.args.parampass == "0": 239 if self.args.parampass == "0":
233 self.clsimple() 240 self.clsimple()
234 else: 241 else:
235 clsuffix = [] 242 clsuffix = []
236 xclsuffix = [] 243 xclsuffix = []
237 for i, p in enumerate(self.infiles): 244 for i, p in enumerate(self.infiles):
238 if p[IOCLPOS] == "STDIN": 245 if p[IOCLPOS].upper() == "STDIN":
239 appendme = [ 246 appendme = [
240 p[IOCLPOS], 247 p[ICLPOS],
241 p[ICLPOS], 248 p[ICLPOS],
242 p[IPATHPOS], 249 p[IPATHPOS],
243 "< %s" % p[IPATHPOS], 250 "< %s" % p[IPATHPOS],
244 ] 251 ]
245 xappendme = [ 252 xappendme = [
246 p[IOCLPOS], 253 p[ICLPOS],
247 p[ICLPOS], 254 p[ICLPOS],
248 p[IPATHPOS], 255 p[IPATHPOS],
249 "< $%s" % p[ICLPOS], 256 "< $%s" % p[ICLPOS],
250 ] 257 ]
251 else: 258 else:
256 for i, p in enumerate(self.outfiles): 263 for i, p in enumerate(self.outfiles):
257 if p[OOCLPOS] == "STDOUT": 264 if p[OOCLPOS] == "STDOUT":
258 self.lastclredirect = [">", p[ONAMEPOS]] 265 self.lastclredirect = [">", p[ONAMEPOS]]
259 self.lastxclredirect = [">", "$%s" % p[OCLPOS]] 266 self.lastxclredirect = [">", "$%s" % p[OCLPOS]]
260 else: 267 else:
261 clsuffix.append([p[ONAMEPOS], p[ONAMEPOS], p[ONAMEPOS], ""]) 268 clsuffix.append([p[OCLPOS], p[ONAMEPOS], p[ONAMEPOS], ""])
262 xclsuffix.append([p[ONAMEPOS], p[ONAMEPOS], "$%s" % p[ONAMEPOS], ""]) 269 xclsuffix.append(
270 [p[OCLPOS], p[ONAMEPOS], "$%s" % p[ONAMEPOS], ""]
271 )
263 for p in self.addpar: 272 for p in self.addpar:
264 clsuffix.append([p[AOCLPOS], p[ACLPOS], p[AVALPOS], p[AOVERPOS]]) 273 clsuffix.append(
274 [p[AOCLPOS], p[ACLPOS], p[AVALPOS], p[AOVERPOS]]
275 )
265 xclsuffix.append( 276 xclsuffix.append(
266 [p[AOCLPOS], p[ACLPOS], '"$%s"' % p[ANAMEPOS], p[AOVERPOS]] 277 [p[AOCLPOS], p[ACLPOS], '"$%s"' % p[ANAMEPOS], p[AOVERPOS]]
267 ) 278 )
268 clsuffix.sort() 279 clsuffix.sort()
269 xclsuffix.sort() 280 xclsuffix.sort()
288 tscript.close() 299 tscript.close()
289 self.escapedScript = [cheetah_escape(x) for x in rx] 300 self.escapedScript = [cheetah_escape(x) for x in rx]
290 self.spacedScript = [f" {x}" for x in rx if x.strip() > ""] 301 self.spacedScript = [f" {x}" for x in rx if x.strip() > ""]
291 art = "%s.%s" % (self.tool_name, self.executeme) 302 art = "%s.%s" % (self.tool_name, self.executeme)
292 artifact = open(art, "wb") 303 artifact = open(art, "wb")
293 artifact.write(bytes('\n'.join(self.escapedScript),'utf8')) 304 artifact.write(bytes("\n".join(self.escapedScript), "utf8"))
294 artifact.close() 305 artifact.close()
295 306
296 def cleanuppar(self): 307 def cleanuppar(self):
297 """ positional parameters are complicated by their numeric ordinal""" 308 """ positional parameters are complicated by their numeric ordinal"""
298 for i, p in enumerate(self.infiles): 309 if self.args.parampass == "positional":
299 infp = copy.copy(p) 310 for i, p in enumerate(self.infiles):
300 if self.args.parampass == "positional": 311 assert (
301 assert infp[ 312 p[ICLPOS].isdigit() or p[ICLPOS].strip().upper() == "STDIN"
302 ICLPOS 313 ), "Positional parameters must be ordinal integers - got %s for %s" % (
303 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % ( 314 p[ICLPOS],
304 infp[ICLPOS], 315 p[ILABPOS],
305 infp[ILABPOS], 316 )
306 ) 317 for i, p in enumerate(self.outfiles):
307 icl = infp[ICLPOS] 318 assert (
308 infp.append(icl) 319 p[OCLPOS].isdigit()
309 if infp[ICLPOS].isdigit() or self.args.parampass == "0": 320 or p[OCLPOS].strip().upper() == "STDOUT"
310 scl = "input%d" % (i + 1) 321 ), "Positional parameters must be ordinal integers - got %s for %s" % (
311 infp[ICLPOS] = scl
312 self.infiles[i] = infp
313 for i, p in enumerate(
314 self.outfiles
315 ):
316 if self.args.parampass == "positional" and p[OCLPOS].upper() != "STDOUT":
317 assert p[
318 OCLPOS
319 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % (
320 p[OCLPOS], 322 p[OCLPOS],
321 p[ONAMEPOS], 323 p[ONAMEPOS],
322 ) 324 )
323 p.append(p[OCLPOS]) # keep copy 325 for i, p in enumerate(self.addpar):
324 if p[OOCLPOS].isdigit() or p[OOCLPOS].upper() == "STDOUT":
325 scl = p[ONAMEPOS]
326 p[OCLPOS] = scl
327 self.outfiles[i] = p
328 for i, p in enumerate(self.addpar):
329 if self.args.parampass == "positional":
330 assert p[ 326 assert p[
331 ACLPOS 327 ACLPOS
332 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % ( 328 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % (
333 p[ACLPOS], 329 p[ACLPOS],
334 p[ANAMEPOS], 330 p[ANAMEPOS],
335 ) 331 )
332 for i, p in enumerate(self.infiles):
333 infp = copy.copy(p)
334 icl = infp[ICLPOS]
335 infp.append(icl)
336 if (
337 infp[ICLPOS].isdigit()
338 or self.args.parampass == "0"
339 or infp[ICLPOS].strip().upper() == "STDOUT"
340 ):
341 scl = "input%d" % (i + 1)
342 infp[ICLPOS] = scl
343 self.infiles[i] = infp
344 for i, p in enumerate(self.outfiles):
345 p.append(p[OCLPOS]) # keep copy
346 if (
347 p[OOCLPOS].isdigit() and self.args.parampass != "positional"
348 ) or p[OOCLPOS].strip().upper() == "STDOUT":
349 scl = p[ONAMEPOS]
350 p[OCLPOS] = scl
351 self.outfiles[i] = p
352 for i, p in enumerate(self.addpar):
336 p.append(p[ACLPOS]) 353 p.append(p[ACLPOS])
337 if p[ACLPOS].isdigit(): 354 if p[ACLPOS].isdigit():
338 scl = "input%s" % p[ACLPOS] 355 scl = "param%s" % p[ACLPOS]
339 p[ACLPOS] = scl 356 p[ACLPOS] = scl
340 self.addpar[i] = p 357 self.addpar[i] = p
341 358
342 def clsimple(self): 359 def clsimple(self):
343 """no parameters - uses < and > for i/o""" 360 """no parameters - uses < and > for i/o"""
368 aXCL(v) 385 aXCL(v)
369 if self.lastxclredirect: 386 if self.lastxclredirect:
370 aXCL(self.lastxclredirect[0]) 387 aXCL(self.lastxclredirect[0])
371 aXCL(self.lastxclredirect[1]) 388 aXCL(self.lastxclredirect[1])
372 389
373
374 def clargparse(self): 390 def clargparse(self):
375 """argparse style""" 391 """argparse style"""
376 aCL = self.cl.append 392 aCL = self.cl.append
377 aXCL = self.xmlcl.append 393 aXCL = self.xmlcl.append
378 # inputs then params in argparse named form 394 # inputs then params in argparse named form
394 else: 410 else:
395 k = "--%s" % k 411 k = "--%s" % k
396 aCL(k) 412 aCL(k)
397 aCL(v) 413 aCL(v)
398 414
399
400 def getNdash(self, newname): 415 def getNdash(self, newname):
401 if self.is_positional: 416 if self.is_positional:
402 ndash = 0 417 ndash = 0
403 else: 418 else:
404 ndash = 2 419 ndash = 2
406 ndash = 1 421 ndash = 1
407 return ndash 422 return ndash
408 423
409 def doXMLparam(self): 424 def doXMLparam(self):
410 """flake8 made me do this...""" 425 """flake8 made me do this..."""
411 for p in self.outfiles: # --output_files "$otab.history_name~~~$otab.history_format~~~$otab.history_CL~~~$otab.history_test" 426 for (
427 p
428 ) in (
429 self.outfiles
430 ): # --output_files "$otab.history_name~~~$otab.history_format~~~$otab.history_CL~~~$otab.history_test"
412 newname, newfmt, newcl, test, oldcl = p 431 newname, newfmt, newcl, test, oldcl = p
413 test = test.strip() 432 test = test.strip()
414 ndash = self.getNdash(newcl) 433 ndash = self.getNdash(newcl)
415 aparm = gxtp.OutputData(name=newname, format=newfmt, num_dashes=ndash, label=newcl) 434 aparm = gxtp.OutputData(
435 name=newname, format=newfmt, num_dashes=ndash, label=newcl
436 )
416 aparm.positional = self.is_positional 437 aparm.positional = self.is_positional
417 if self.is_positional: 438 if self.is_positional:
418 if oldcl.upper() == "STDOUT": 439 if oldcl.upper() == "STDOUT":
419 aparm.positional = 9999999 440 aparm.positional = 9999999
420 aparm.command_line_override = "> $%s" % newname 441 aparm.command_line_override = "> $%s" % newname
428 c = "diff" 449 c = "diff"
429 ld = 0 450 ld = 0
430 if test.split(":")[1].isdigit: 451 if test.split(":")[1].isdigit:
431 ld = int(test.split(":")[1]) 452 ld = int(test.split(":")[1])
432 tp = gxtp.TestOutput( 453 tp = gxtp.TestOutput(
433 name=newcl, 454 name=newname,
434 value="%s_sample" % newcl, 455 value="%s_sample" % newname,
435 format=newfmt, 456 format=newfmt,
436 compare= c, 457 compare=c,
437 lines_diff=ld, 458 lines_diff=ld,
438 ) 459 )
439 elif test.startswith("sim_size"): 460 elif test.startswith("sim_size"):
440 c = "sim_size" 461 c = "sim_size"
441 tn = test.split(":")[1].strip() 462 tn = test.split(":")[1].strip()
442 if tn > '': 463 if tn > "":
443 if '.' in tn: 464 if "." in tn:
444 delta = None 465 delta = None
445 delta_frac = min(1.0,float(tn)) 466 delta_frac = min(1.0, float(tn))
446 else: 467 else:
447 delta = int(tn) 468 delta = int(tn)
448 delta_frac = None 469 delta_frac = None
449 tp = gxtp.TestOutput( 470 tp = gxtp.TestOutput(
450 name=newcl, 471 name=newname,
451 value="%s_sample" % newcl, 472 value="%s_sample" % newname,
452 format=newfmt, 473 format=newfmt,
453 compare= c, 474 compare=c,
454 delta = delta, 475 delta=delta,
455 delta_frac = delta_frac 476 delta_frac=delta_frac,
456 ) 477 )
457 self.testparam.append(tp) 478 self.testparam.append(tp)
458 for p in self.infiles: 479 for p in self.infiles:
459 newname = p[ICLPOS] 480 newname = p[ICLPOS]
460 newfmt = p[IFMTPOS] 481 newfmt = p[IFMTPOS]
461 ndash = self.getNdash(newname) 482 ndash = self.getNdash(newname)
475 aninput.positional = self.is_positional 496 aninput.positional = self.is_positional
476 self.tinputs.append(aninput) 497 self.tinputs.append(aninput)
477 tparm = gxtp.TestParam(name=newname, value="%s_sample" % newname) 498 tparm = gxtp.TestParam(name=newname, value="%s_sample" % newname)
478 self.testparam.append(tparm) 499 self.testparam.append(tparm)
479 for p in self.addpar: 500 for p in self.addpar:
480 newname, newval, newlabel, newhelp, newtype, newcl, override, oldcl = p 501 (
502 newname,
503 newval,
504 newlabel,
505 newhelp,
506 newtype,
507 newcl,
508 override,
509 oldcl,
510 ) = p
481 if not len(newlabel) > 0: 511 if not len(newlabel) > 0:
482 newlabel = newname 512 newlabel = newname
483 ndash = self.getNdash(newname) 513 ndash = self.getNdash(newname)
484 if newtype == "text": 514 if newtype == "text":
485 aparm = gxtp.TextParam( 515 aparm = gxtp.TextParam(
561 Create a Galaxy xml tool wrapper for the new script 591 Create a Galaxy xml tool wrapper for the new script
562 Uses galaxyhtml 592 Uses galaxyhtml
563 Hmmm. How to get the command line into correct order... 593 Hmmm. How to get the command line into correct order...
564 """ 594 """
565 if self.command_override: 595 if self.command_override:
566 self.newtool.command_override = self.command_override # config file 596 self.newtool.command_override = (
597 self.command_override
598 ) # config file
567 else: 599 else:
568 self.newtool.command_override = self.xmlcl 600 self.newtool.command_override = self.xmlcl
569 if self.args.help_text: 601 if self.args.help_text:
570 helptext = open(self.args.help_text, "r").readlines() 602 helptext = open(self.args.help_text, "r").readlines()
571 safertext = "\n".join([cheetah_escape(x) for x in helptext]) 603 safertext = "\n".join([cheetah_escape(x) for x in helptext])
572 if self.args.script_path: 604 if self.args.script_path:
573 scr = [x for x in self.spacedScript if x.strip() > ""] 605 scr = [x for x in self.spacedScript if x.strip() > ""]
574 scr.insert(0,'\n------\n\n\nScript::\n') 606 scr.insert(0, "\n------\n\n\nScript::\n")
575 if len(scr) > 300: 607 if len(scr) > 300:
576 scr = ( 608 scr = (
577 scr[:100] 609 scr[:100]
578 + [" >300 lines - stuff deleted", " ......"] 610 + [" >300 lines - stuff deleted", " ......"]
579 + scr[-100:] 611 + scr[-100:]
580 ) 612 )
581 scr.append('\n') 613 scr.append("\n")
582 safertext = safertext + "\n".join(scr) 614 safertext = safertext + "\n".join(scr)
583 self.newtool.help = safertext 615 self.newtool.help = safertext
584 else: 616 else:
585 self.newtool.help = ( 617 self.newtool.help = (
586 "Please ask the tool author (%s) for help \ 618 "Please ask the tool author (%s) for help \
589 ) 621 )
590 self.newtool.version_command = None # do not want 622 self.newtool.version_command = None # do not want
591 requirements = gxtp.Requirements() 623 requirements = gxtp.Requirements()
592 if self.args.packages: 624 if self.args.packages:
593 for d in self.args.packages.split(","): 625 for d in self.args.packages.split(","):
594 ver = '' 626 ver = ""
595 d = d.replace('==',':') 627 d = d.replace("==", ":")
596 d = d.replace('=',':') 628 d = d.replace("=", ":")
597 if ":" in d: 629 if ":" in d:
598 packg, ver = d.split(":") 630 packg, ver = d.split(":")
599 else: 631 else:
600 packg = d 632 packg = d
601 requirements.append( 633 requirements.append(
608 self.doXMLparam() 640 self.doXMLparam()
609 self.newtool.outputs = self.toutputs 641 self.newtool.outputs = self.toutputs
610 self.newtool.inputs = self.tinputs 642 self.newtool.inputs = self.tinputs
611 if self.args.script_path: 643 if self.args.script_path:
612 configfiles = gxtp.Configfiles() 644 configfiles = gxtp.Configfiles()
613 configfiles.append(gxtp.Configfile(name="runme", text="\n".join(self.escapedScript))) 645 configfiles.append(
646 gxtp.Configfile(
647 name="runme", text="\n".join(self.escapedScript)
648 )
649 )
614 self.newtool.configfiles = configfiles 650 self.newtool.configfiles = configfiles
615 tests = gxtp.Tests() 651 tests = gxtp.Tests()
616 test_a = gxtp.Test() 652 test_a = gxtp.Test()
617 for tp in self.testparam: 653 for tp in self.testparam:
618 test_a.append(tp) 654 test_a.append(tp)
625 self.newtool.add_comment("Source in git at: %s" % (toolFactoryURL)) 661 self.newtool.add_comment("Source in git at: %s" % (toolFactoryURL))
626 self.newtool.add_comment( 662 self.newtool.add_comment(
627 "Cite: Creating re-usable tools from scripts doi:10.1093/bioinformatics/bts573" 663 "Cite: Creating re-usable tools from scripts doi:10.1093/bioinformatics/bts573"
628 ) 664 )
629 exml0 = self.newtool.export() 665 exml0 = self.newtool.export()
630 exml = exml0.replace(FAKEEXE, "") # temporary work around until PR accepted 666 exml = exml0.replace(
667 FAKEEXE, ""
668 ) # temporary work around until PR accepted
631 if ( 669 if (
632 self.test_override 670 self.test_override
633 ): # cannot do this inside galaxyxml as it expects lxml objects for tests 671 ): # cannot do this inside galaxyxml as it expects lxml objects for tests
634 part1 = exml.split("<tests>")[0] 672 part1 = exml.split("<tests>")[0]
635 part2 = exml.split("</tests>")[1] 673 part2 = exml.split("</tests>")[1]
636 fixed = "%s\n%s\n%s" % (part1, self.test_override, part2) 674 fixed = "%s\n%s\n%s" % (part1, self.test_override, part2)
637 exml = fixed 675 exml = fixed
638 #exml = exml.replace('range="1:"', 'range="1000:"') 676 # exml = exml.replace('range="1:"', 'range="1000:"')
639 xf = open("%s.xml" % self.tool_name, "w") 677 xf = open("%s.xml" % self.tool_name, "w")
640 xf.write(exml) 678 xf.write(exml)
641 xf.write("\n") 679 xf.write("\n")
642 xf.close() 680 xf.close()
643 # ready for the tarball 681 # ready for the tarball
655 if os.path.exists(self.elog): 693 if os.path.exists(self.elog):
656 ste = open(self.elog, "a") 694 ste = open(self.elog, "a")
657 else: 695 else:
658 ste = open(self.elog, "w") 696 ste = open(self.elog, "w")
659 if self.lastclredirect: 697 if self.lastclredirect:
660 sto = open(self.lastclredirect[1], "wb") # is name of an output file 698 sto = open(
699 self.lastclredirect[1], "wb"
700 ) # is name of an output file
661 else: 701 else:
662 if os.path.exists(self.tlog): 702 if os.path.exists(self.tlog):
663 sto = open(self.tlog, "a") 703 sto = open(self.tlog, "a")
664 else: 704 else:
665 sto = open(self.tlog, "w") 705 sto = open(self.tlog, "w")
666 sto.write( 706 sto.write(
667 "## Executing Toolfactory generated command line = %s\n" % scl 707 "## Executing Toolfactory generated command line = %s\n"
708 % scl
668 ) 709 )
669 sto.flush() 710 sto.flush()
670 subp = subprocess.run( 711 subp = subprocess.run(
671 self.cl, env=self.ourenv, shell=False, stdout=sto, stderr=ste 712 self.cl, env=self.ourenv, shell=False, stdout=sto, stderr=ste
672 ) 713 )
683 else: 724 else:
684 sto = sys.stdout 725 sto = sys.stdout
685 subp = subprocess.run( 726 subp = subprocess.run(
686 self.cl, env=self.ourenv, shell=False, stdout=sto, stdin=sti 727 self.cl, env=self.ourenv, shell=False, stdout=sto, stdin=sti
687 ) 728 )
688 sto.write("## Executing Toolfactory generated command line = %s\n" % scl) 729 sto.write(
730 "## Executing Toolfactory generated command line = %s\n" % scl
731 )
689 retval = subp.returncode 732 retval = subp.returncode
690 sto.close() 733 sto.close()
691 sti.close() 734 sti.close()
692 if os.path.isfile(self.tlog) and os.stat(self.tlog).st_size == 0: 735 if os.path.isfile(self.tlog) and os.stat(self.tlog).st_size == 0:
693 os.unlink(self.tlog) 736 os.unlink(self.tlog)
696 if retval != 0 and err: # problem 739 if retval != 0 and err: # problem
697 sys.stderr.write(err) 740 sys.stderr.write(err)
698 logging.debug("run done") 741 logging.debug("run done")
699 return retval 742 return retval
700 743
701 def copy_to_container(self, src, dest, container):
702 """Recreate the src directory tree at dest - full path included"""
703 idir = os.getcwd()
704 workdir = os.path.dirname(src)
705 os.chdir(workdir)
706 _, tfname = tempfile.mkstemp(suffix=".tar")
707 tar = tarfile.open(tfname, mode="w")
708 srcb = os.path.basename(src)
709 tar.add(srcb)
710 tar.close()
711 data = open(tfname, "rb").read()
712 container.put_archive(dest, data)
713 os.unlink(tfname)
714 os.chdir(idir)
715
716 def copy_from_container(self, src, dest, container):
717 """recreate the src directory tree at dest using docker sdk"""
718 os.makedirs(dest, exist_ok=True)
719 _, tfname = tempfile.mkstemp(suffix=".tar")
720 tf = open(tfname, "wb")
721 bits, stat = container.get_archive(src)
722 for chunk in bits:
723 tf.write(chunk)
724 tf.close()
725 tar = tarfile.open(tfname, "r")
726 tar.extractall(dest)
727 tar.close()
728 os.unlink(tfname)
729
730 def planemo_biodocker_test(self):
731 """planemo currently leaks dependencies if used in the same container and gets unhappy after a
732 first successful run. https://github.com/galaxyproject/planemo/issues/1078#issuecomment-731476930
733
734 Docker biocontainer has planemo with caches filled to save repeated downloads
735
736
737 """
738
739 def prun(container, tout, cl, user="biodocker"):
740 rlog = container.exec_run(cl, user=user)
741 slogl = str(rlog).split("\\n")
742 slog = "\n".join(slogl)
743 tout.write(f"## got rlog {slog} from {cl}\n")
744
745 if os.path.exists(self.tlog):
746 tout = open(self.tlog, "a")
747 else:
748 tout = open(self.tlog, "w")
749 planemoimage = "quay.io/fubar2/planemo-biocontainer"
750 xreal = "%s.xml" % self.tool_name
751 repname = f"{self.tool_name}_planemo_test_report.html"
752 ptestrep_path = os.path.join(self.repdir, repname)
753 tool_name = self.tool_name
754 client = docker.from_env()
755 tvol = client.volumes.create()
756 tvolname = tvol.name
757 destdir = "/toolfactory/ptest"
758 imrep = os.path.join(destdir, repname)
759 # need to keep the container running so keep it open with sleep
760 # will stop and destroy it when we are done
761 container = client.containers.run(
762 planemoimage,
763 "sleep 120m",
764 detach=True,
765 user="biodocker",
766 volumes={f"{tvolname}": {"bind": "/toolfactory", "mode": "rw"}},
767 )
768 cl = f"mkdir -p {destdir}"
769 prun(container, tout, cl, user="root")
770 # that's how hard it is to get root on a biodocker container :(
771 cl = f"rm -rf {destdir}/*"
772 prun(container, tout, cl, user="root")
773 ptestpath = os.path.join(destdir, "tfout", xreal)
774 self.copy_to_container(self.tooloutdir, destdir, container)
775 cl = "chown -R biodocker /toolfactory"
776 prun(container, tout, cl, user="root")
777 rlog = container.exec_run(f"ls -la {destdir}")
778 ptestcl = f"planemo test --update_test_data --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}"
779 try:
780 rlog = container.exec_run(ptestcl)
781 # fails because test outputs missing but updates the test-data directory
782 except:
783 e = sys.exc_info()[0]
784 tout.write(f"#### error: {e} from {ptestcl}\n")
785 cl = f"planemo test --test_output {imrep} --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}"
786 try:
787 prun(container, tout, cl)
788 except:
789 e = sys.exc_info()[0]
790 tout.write(f"#### error: {e} from {ptestcl}\n")
791 testouts = tempfile.mkdtemp(suffix=None, prefix="tftemp", dir=".")
792 self.copy_from_container(destdir, testouts, container)
793 src = os.path.join(testouts, "ptest")
794 if os.path.isdir(src):
795 shutil.copytree(src, ".", dirs_exist_ok=True)
796 src = repname
797 if os.path.isfile(repname):
798 shutil.copyfile(src, ptestrep_path)
799 else:
800 tout.write(f"No output from run to shutil.copytree in {src}\n")
801 tout.close()
802 container.stop()
803 container.remove()
804 tvol.remove()
805 shutil.rmtree(testouts) # leave for debugging
806
807 def shedLoad(self): 744 def shedLoad(self):
808 """ 745 """
809 use bioblend to create new repository 746 use bioblend to create new repository
810 or update existing 747 or update existing
811 748
814 sto = open(self.tlog, "a") 751 sto = open(self.tlog, "a")
815 else: 752 else:
816 sto = open(self.tlog, "w") 753 sto = open(self.tlog, "w")
817 754
818 ts = toolshed.ToolShedInstance( 755 ts = toolshed.ToolShedInstance(
819 url=self.args.toolshed_url, key=self.args.toolshed_api_key, verify=False 756 url=self.args.toolshed_url,
757 key=self.args.toolshed_api_key,
758 verify=False,
820 ) 759 )
821 repos = ts.repositories.get_repositories() 760 repos = ts.repositories.get_repositories()
822 rnames = [x.get("name", "?") for x in repos] 761 rnames = [x.get("name", "?") for x in repos]
823 rids = [x.get("id", "?") for x in repos] 762 rids = [x.get("id", "?") for x in repos]
824 tfcat = "ToolFactory generated tools" 763 tfcat = "ToolFactory generated tools"
838 remote_repository_url=self.args.toolshed_url, 777 remote_repository_url=self.args.toolshed_url,
839 homepage_url=None, 778 homepage_url=None,
840 category_ids=catID, 779 category_ids=catID,
841 ) 780 )
842 tid = res.get("id", None) 781 tid = res.get("id", None)
843 sto.write(f"#create_repository {self.args.tool_name} tid={tid} res={res}\n") 782 sto.write(
783 f"#create_repository {self.args.tool_name} tid={tid} res={res}\n"
784 )
844 else: 785 else:
845 i = rnames.index(self.tool_name) 786 i = rnames.index(self.tool_name)
846 tid = rids[i] 787 tid = rids[i]
847 try: 788 try:
848 res = ts.repositories.update_repository( 789 res = ts.repositories.update_repository(
880 "--section_label", 821 "--section_label",
881 "ToolFactory", 822 "ToolFactory",
882 ] 823 ]
883 tout.write("running\n%s\n" % " ".join(cll)) 824 tout.write("running\n%s\n" % " ".join(cll))
884 subp = subprocess.run( 825 subp = subprocess.run(
885 cll, env=self.ourenv, cwd=self.ourcwd, shell=False, stderr=tout, stdout=tout 826 cll,
827 env=self.ourenv,
828 cwd=self.ourcwd,
829 shell=False,
830 stderr=tout,
831 stdout=tout,
886 ) 832 )
887 tout.write( 833 tout.write(
888 "installed %s - got retcode %d\n" % (self.tool_name, subp.returncode) 834 "installed %s - got retcode %d\n"
835 % (self.tool_name, subp.returncode)
889 ) 836 )
890 tout.close() 837 tout.close()
891 return subp.returncode 838 return subp.returncode
892
893
894 839
895 def writeShedyml(self): 840 def writeShedyml(self):
896 """for planemo""" 841 """for planemo"""
897 yuser = self.args.user_email.split("@")[0] 842 yuser = self.args.user_email.split("@")[0]
898 yfname = os.path.join(self.tooloutdir, ".shed.yml") 843 yfname = os.path.join(self.tooloutdir, ".shed.yml")
948 tout.write( 893 tout.write(
949 "###Output file %s not found in testdir %s. This is normal during the first Planemo run that generates test outputs" 894 "###Output file %s not found in testdir %s. This is normal during the first Planemo run that generates test outputs"
950 % (tdest, self.testdir) 895 % (tdest, self.testdir)
951 ) 896 )
952 tf = tarfile.open(self.newtarpath, "w:gz") 897 tf = tarfile.open(self.newtarpath, "w:gz")
953 tf.add(name=self.tooloutdir, arcname=self.tool_name, filter=exclude_function) 898 tf.add(
899 name=self.tooloutdir,
900 arcname=self.tool_name,
901 filter=exclude_function,
902 )
954 tf.close() 903 tf.close()
955 shutil.copyfile(self.newtarpath, self.args.new_tool) 904 shutil.copyfile(self.newtarpath, self.args.new_tool)
956 905
957 def moveRunOutputs(self): 906 def moveRunOutputs(self):
958 """need to move planemo or run outputs into toolfactory collection""" 907 """need to move planemo or run outputs into toolfactory collection"""
988 shutil.copyfile(src, dest) 937 shutil.copyfile(src, dest)
989 938
990 939
991 def main(): 940 def main():
992 """ 941 """
993 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml 942 This is a Galaxy wrapper.
943 It expects to be called by a special purpose tool.xml
994 944
995 """ 945 """
996 parser = argparse.ArgumentParser() 946 parser = argparse.ArgumentParser()
997 a = parser.add_argument 947 a = parser.add_argument
998 a("--script_path", default=None) 948 a("--script_path", default=None)
1018 a("--parampass", default="positional") 968 a("--parampass", default="positional")
1019 a("--tfout", default="./tfout") 969 a("--tfout", default="./tfout")
1020 a("--new_tool", default="new_tool") 970 a("--new_tool", default="new_tool")
1021 a("--galaxy_url", default="http://localhost:8080") 971 a("--galaxy_url", default="http://localhost:8080")
1022 a("--toolshed_url", default="http://localhost:9009") 972 a("--toolshed_url", default="http://localhost:9009")
1023 # make sure this is identical to tool_sheds_conf.xml localhost != 127.0.0.1 so validation fails 973 # make sure this is identical to tool_sheds_conf.xml
974 # localhost != 127.0.0.1 so validation fails
1024 a("--toolshed_api_key", default="fakekey") 975 a("--toolshed_api_key", default="fakekey")
1025 a("--galaxy_api_key", default="fakekey") 976 a("--galaxy_api_key", default="fakekey")
1026 a("--galaxy_root", default="/galaxy-central") 977 a("--galaxy_root", default="/galaxy-central")
1027 a("--galaxy_venv", default="/galaxy_venv") 978 a("--galaxy_venv", default="/galaxy_venv")
1028 args = parser.parse_args() 979 args = parser.parse_args()
1029 assert not args.bad_user, ( 980 assert not args.bad_user, (
1030 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file' 981 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy \
982 admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file'
1031 % (args.bad_user, args.bad_user) 983 % (args.bad_user, args.bad_user)
1032 ) 984 )
1033 assert args.tool_name, "## Tool Factory expects a tool name - eg --tool_name=DESeq" 985 assert (
986 args.tool_name
987 ), "## Tool Factory expects a tool name - eg --tool_name=DESeq"
1034 assert ( 988 assert (
1035 args.sysexe or args.packages 989 args.sysexe or args.packages
1036 ), "## Tool Factory wrapper expects an interpreter or an executable package" 990 ), "## Tool Factory wrapper expects an interpreter \
1037 args.input_files = [x.replace('"', "").replace("'", "") for x in args.input_files] 991 or an executable package in --sysexe or --packages"
992 args.input_files = [
993 x.replace('"', "").replace("'", "") for x in args.input_files
994 ]
1038 # remove quotes we need to deal with spaces in CL params 995 # remove quotes we need to deal with spaces in CL params
1039 for i, x in enumerate(args.additional_parameters): 996 for i, x in enumerate(args.additional_parameters):
1040 args.additional_parameters[i] = args.additional_parameters[i].replace('"', "") 997 args.additional_parameters[i] = args.additional_parameters[i].replace(
998 '"', ""
999 )
1041 r = ScriptRunner(args) 1000 r = ScriptRunner(args)
1042 r.writeShedyml() 1001 r.writeShedyml()
1043 r.makeTool() 1002 r.makeTool()
1044 if args.make_Tool == "generate": 1003 if args.make_Tool == "generate":
1045 retcode = r.run() # for testing toolfactory itself 1004 retcode = r.run()
1046 r.moveRunOutputs() 1005 r.moveRunOutputs()
1047 r.makeToolTar() 1006 r.makeToolTar()
1048 else: 1007 else:
1049 r.planemo_biodocker_test() # test to make outputs and then test 1008 retcode = r.planemo_test(genoutputs=True) # this fails :( - see PR
1050 r.moveRunOutputs() 1009 r.moveRunOutputs()
1051 r.makeToolTar() 1010 r.makeToolTar()
1011 retcode = r.planemo_test(genoutputs=False)
1012 r.moveRunOutputs()
1013 r.makeToolTar()
1014 print(f"second planemo_test returned {retcode}")
1052 if args.make_Tool == "gentestinstall": 1015 if args.make_Tool == "gentestinstall":
1053 r.shedLoad() 1016 r.shedLoad()
1054 r.eph_galaxy_load() 1017 r.eph_galaxy_load()
1055 1018
1056 1019