comparison toolfactory_docker/rgToolFactory2.py @ 2:a5c5652823a6 draft

Uploaded
author fubar
date Tue, 05 Jan 2021 00:35:40 +0000
parents
children 12331d91c4ad
comparison
equal deleted inserted replaced
1:0778fb523693 2:a5c5652823a6
1 # replace with shebang for biocontainer
2 # see https://github.com/fubar2/toolfactory
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at
9 # https://github.com/fubar2/toolfactory
10 #
11 # July 2020: BCC was fun and I feel like rip van winkle after 5 years.
12 # Decided to
13 # 1. Fix the toolfactory so it works - done for simplest case
14 # 2. Fix planemo so the toolfactory function works
15 # 3. Rewrite bits using galaxyxml functions where that makes sense - done
16 #
17 # uses planemo in a biodocker sort of image as a requirement
18 # otherwise planemo seems to leak dependencies back into the
19 # calling venv. Hilarity ensues.
20
21
22 # # expects tarball as first, html out as second parameter
23 # cp $1 foo.tar.gz
24 # tar -xvzf foo.tar.gz
25 # TOOLNAME=`find . -name "*.xml"`
26 # echo "*****TOOLNAME = $TOOLNAME"
27 # planemo test --test_output $2 $TOOLNAME
28
29 import argparse
30 import copy
31 import logging
32 import os
33 import re
34 import shutil
35 import subprocess
36 import sys
37 import tarfile
38 import tempfile
39 import time
40
41
42 from bioblend import ConnectionError
43 from bioblend import toolshed
44
45 import docker
46
47 import galaxyxml.tool as gxt
48 import galaxyxml.tool.parameters as gxtp
49
50 import lxml
51
52 import yaml
53
54
55 myversion = "V2.1 July 2020"
56 verbose = True
57 debug = True
58 toolFactoryURL = "https://github.com/fubar2/toolfactory"
59 ourdelim = "~~~"
60
61 # --input_files="$intab.input_files~~~$intab.input_CL~~~
62 # $intab.input_formats# ~~~$intab.input_label
63 # ~~~$intab.input_help"
64 IPATHPOS = 0
65 ICLPOS = 1
66 IFMTPOS = 2
67 ILABPOS = 3
68 IHELPOS = 4
69 IOCLPOS = 5
70
71 # --output_files "$otab.history_name~~~$otab.history_format~~~
72 # $otab.history_CL~~~$otab.history_test"
73 ONAMEPOS = 0
74 OFMTPOS = 1
75 OCLPOS = 2
76 OTESTPOS = 3
77 OOCLPOS = 4
78
79
80 # --additional_parameters="$i.param_name~~~$i.param_value~~~
81 # $i.param_label~~~$i.param_help~~~$i.param_type
82 # ~~~$i.CL~~~i$.param_CLoverride"
83 ANAMEPOS = 0
84 AVALPOS = 1
85 ALABPOS = 2
86 AHELPPOS = 3
87 ATYPEPOS = 4
88 ACLPOS = 5
89 AOVERPOS = 6
90 AOCLPOS = 7
91
92
93 foo = len(lxml.__version__)
94 # fug you, flake8. Say my name!
95 FAKEEXE = "~~~REMOVE~~~ME~~~"
96 # need this until a PR/version bump to fix galaxyxml prepending the exe even
97 # with override.
98
99
100 def timenow():
101 """return current time as a string"""
102 return time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time()))
103
104
105 def quote_non_numeric(s):
106 """return a prequoted string for non-numerics
107 useful for perl and Rscript parameter passing?
108 """
109 try:
110 _ = float(s)
111 return s
112 except ValueError:
113 return '"%s"' % s
114
115
116 html_escape_table = {
117 "&": "&",
118 ">": ">",
119 "<": "&lt;",
120 "#": "&#35;",
121 "$": "&#36;",
122 }
123 cheetah_escape_table = {"$": "\\$", "#": "\\#"}
124
125
126 def html_escape(text):
127 """Produce entities within text."""
128 return "".join([html_escape_table.get(c, c) for c in text])
129
130
131 def cheetah_escape(text):
132 """Produce entities within text."""
133 return "".join([cheetah_escape_table.get(c, c) for c in text])
134
135
136 def html_unescape(text):
137 """Revert entities within text. Multiple character targets so use replace"""
138 t = text.replace("&amp;", "&")
139 t = t.replace("&gt;", ">")
140 t = t.replace("&lt;", "<")
141 t = t.replace("\\$", "$")
142 t = t.replace("&#36;", "$")
143 t = t.replace("&#35;", "#")
144 return t
145
146
147 def parse_citations(citations_text):
148 """"""
149 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
150 citation_tuples = []
151 for citation in citations:
152 if citation.startswith("doi"):
153 citation_tuples.append(("doi", citation[len("doi") :].strip()))
154 else:
155 citation_tuples.append(("bibtex", citation[len("bibtex") :].strip()))
156 return citation_tuples
157
158
159 class Error(Exception):
160 """Base class for exceptions in this module."""
161
162 pass
163
164
165 class ScriptRunner:
166 """Wrapper for an arbitrary script
167 uses galaxyxml
168
169 """
170
171 def __init__(self, args=None):
172 """
173 prepare command line cl for running the tool here
174 and prepare elements needed for galaxyxml tool generation
175 """
176 self.ourcwd = os.getcwd()
177 self.ourenv = copy.deepcopy(os.environ)
178 self.infiles = [x.split(ourdelim) for x in args.input_files]
179 self.outfiles = [x.split(ourdelim) for x in args.output_files]
180 self.addpar = [x.split(ourdelim) for x in args.additional_parameters]
181 self.args = args
182 self.cleanuppar()
183 self.lastclredirect = None
184 self.lastxclredirect = None
185 self.cl = []
186 self.xmlcl = []
187 self.is_positional = self.args.parampass == "positional"
188 if self.args.sysexe:
189 self.executeme = self.args.sysexe
190 else:
191 if self.args.packages:
192 self.executeme = self.args.packages.split(",")[0].split(":")[0].strip()
193 else:
194 self.executeme = None
195 aCL = self.cl.append
196 aXCL = self.xmlcl.append
197 assert args.parampass in [
198 "0",
199 "argparse",
200 "positional",
201 ], 'args.parampass must be "0","positional" or "argparse"'
202 self.tool_name = re.sub("[^a-zA-Z0-9_]+", "", args.tool_name)
203 self.tool_id = self.tool_name
204 self.newtool = gxt.Tool(
205 self.tool_name,
206 self.tool_id,
207 self.args.tool_version,
208 self.args.tool_desc,
209 FAKEEXE,
210 )
211 self.newtarpath = "toolfactory_%s.tgz" % self.tool_name
212 self.tooloutdir = "./tfout"
213 self.repdir = "./TF_run_report_tempdir"
214 self.testdir = os.path.join(self.tooloutdir, "test-data")
215 if not os.path.exists(self.tooloutdir):
216 os.mkdir(self.tooloutdir)
217 if not os.path.exists(self.testdir):
218 os.mkdir(self.testdir)
219 if not os.path.exists(self.repdir):
220 os.mkdir(self.repdir)
221 self.tinputs = gxtp.Inputs()
222 self.toutputs = gxtp.Outputs()
223 self.testparam = []
224 if self.args.script_path:
225 self.prepScript()
226 if self.args.command_override:
227 scos = open(self.args.command_override, "r").readlines()
228 self.command_override = [x.rstrip() for x in scos]
229 else:
230 self.command_override = None
231 if self.args.test_override:
232 stos = open(self.args.test_override, "r").readlines()
233 self.test_override = [x.rstrip() for x in stos]
234 else:
235 self.test_override = None
236 if self.args.cl_prefix: # DIY CL start
237 clp = self.args.cl_prefix.split(" ")
238 for c in clp:
239 aCL(c)
240 aXCL(c)
241 else:
242 if self.args.script_path:
243 aCL(self.executeme)
244 aCL(self.sfile)
245 aXCL(self.executeme)
246 aXCL("$runme")
247 else:
248 aCL(self.executeme)
249 aXCL(self.executeme)
250 self.elog = os.path.join(self.repdir, "%s_error_log.txt" % self.tool_name)
251 self.tlog = os.path.join(self.repdir, "%s_runner_log.txt" % self.tool_name)
252
253 if self.args.parampass == "0":
254 self.clsimple()
255 else:
256 clsuffix = []
257 xclsuffix = []
258 for i, p in enumerate(self.infiles):
259 if p[IOCLPOS].upper() == "STDIN":
260 appendme = [
261 p[ICLPOS],
262 p[ICLPOS],
263 p[IPATHPOS],
264 "< %s" % p[IPATHPOS],
265 ]
266 xappendme = [
267 p[ICLPOS],
268 p[ICLPOS],
269 p[IPATHPOS],
270 "< $%s" % p[ICLPOS],
271 ]
272 else:
273 appendme = [p[IOCLPOS], p[ICLPOS], p[IPATHPOS], ""]
274 xappendme = [p[IOCLPOS], p[ICLPOS], "$%s" % p[ICLPOS], ""]
275 clsuffix.append(appendme)
276 xclsuffix.append(xappendme)
277 for i, p in enumerate(self.outfiles):
278 if p[OOCLPOS] == "STDOUT":
279 self.lastclredirect = [">", p[ONAMEPOS]]
280 self.lastxclredirect = [">", "$%s" % p[OCLPOS]]
281 else:
282 clsuffix.append([p[OCLPOS], p[ONAMEPOS], p[ONAMEPOS], ""])
283 xclsuffix.append([p[OCLPOS], p[ONAMEPOS], "$%s" % p[ONAMEPOS], ""])
284 for p in self.addpar:
285 clsuffix.append([p[AOCLPOS], p[ACLPOS], p[AVALPOS], p[AOVERPOS]])
286 xclsuffix.append(
287 [p[AOCLPOS], p[ACLPOS], '"$%s"' % p[ANAMEPOS], p[AOVERPOS]]
288 )
289 clsuffix.sort()
290 xclsuffix.sort()
291 self.xclsuffix = xclsuffix
292 self.clsuffix = clsuffix
293 if self.args.parampass == "positional":
294 self.clpositional()
295 else:
296 self.clargparse()
297
298 def prepScript(self):
299 rx = open(self.args.script_path, "r").readlines()
300 rx = [x.rstrip() for x in rx]
301 rxcheck = [x.strip() for x in rx if x.strip() > ""]
302 assert len(rxcheck) > 0, "Supplied script is empty. Cannot run"
303 self.script = "\n".join(rx)
304 fhandle, self.sfile = tempfile.mkstemp(
305 prefix=self.tool_name, suffix="_%s" % (self.executeme)
306 )
307 tscript = open(self.sfile, "w")
308 tscript.write(self.script)
309 tscript.close()
310 self.escapedScript = [cheetah_escape(x) for x in rx]
311 self.spacedScript = [f" {x}" for x in rx if x.strip() > ""]
312 art = "%s.%s" % (self.tool_name, self.executeme)
313 artifact = open(art, "wb")
314 artifact.write(bytes("\n".join(self.escapedScript), "utf8"))
315 artifact.close()
316
317 def cleanuppar(self):
318 """ positional parameters are complicated by their numeric ordinal"""
319 if self.args.parampass == "positional":
320 for i, p in enumerate(self.infiles):
321 assert (
322 p[ICLPOS].isdigit() or p[ICLPOS].strip().upper() == "STDIN"
323 ), "Positional parameters must be ordinal integers - got %s for %s" % (
324 p[ICLPOS],
325 p[ILABPOS],
326 )
327 for i, p in enumerate(self.outfiles):
328 assert (
329 p[OCLPOS].isdigit() or p[OCLPOS].strip().upper() == "STDOUT"
330 ), "Positional parameters must be ordinal integers - got %s for %s" % (
331 p[OCLPOS],
332 p[ONAMEPOS],
333 )
334 for i, p in enumerate(self.addpar):
335 assert p[
336 ACLPOS
337 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % (
338 p[ACLPOS],
339 p[ANAMEPOS],
340 )
341 for i, p in enumerate(self.infiles):
342 infp = copy.copy(p)
343 icl = infp[ICLPOS]
344 infp.append(icl)
345 if (
346 infp[ICLPOS].isdigit()
347 or self.args.parampass == "0"
348 or infp[ICLPOS].strip().upper() == "STDOUT"
349 ):
350 scl = "input%d" % (i + 1)
351 infp[ICLPOS] = scl
352 self.infiles[i] = infp
353 for i, p in enumerate(self.outfiles):
354 p.append(p[OCLPOS]) # keep copy
355 if (p[OOCLPOS].isdigit() and self.args.parampass != "positional") or p[
356 OOCLPOS
357 ].strip().upper() == "STDOUT":
358 scl = p[ONAMEPOS]
359 p[OCLPOS] = scl
360 self.outfiles[i] = p
361 for i, p in enumerate(self.addpar):
362 p.append(p[ACLPOS])
363 if p[ACLPOS].isdigit():
364 scl = "param%s" % p[ACLPOS]
365 p[ACLPOS] = scl
366 self.addpar[i] = p
367
368 def clsimple(self):
369 """no parameters - uses < and > for i/o"""
370 aCL = self.cl.append
371 aXCL = self.xmlcl.append
372
373 if len(self.infiles) > 0:
374 aCL("<")
375 aCL(self.infiles[0][IPATHPOS])
376 aXCL("<")
377 aXCL("$%s" % self.infiles[0][ICLPOS])
378 if len(self.outfiles) > 0:
379 aCL(">")
380 aCL(self.outfiles[0][OCLPOS])
381 aXCL(">")
382 aXCL("$%s" % self.outfiles[0][ONAMEPOS])
383
384 def clpositional(self):
385 # inputs in order then params
386 aCL = self.cl.append
387 for (o_v, k, v, koverride) in self.clsuffix:
388 if " " in v:
389 aCL("%s" % v)
390 else:
391 aCL(v)
392 aXCL = self.xmlcl.append
393 for (o_v, k, v, koverride) in self.xclsuffix:
394 aXCL(v)
395 if self.lastxclredirect:
396 aXCL(self.lastxclredirect[0])
397 aXCL(self.lastxclredirect[1])
398
399 def clargparse(self):
400 """argparse style"""
401 aCL = self.cl.append
402 aXCL = self.xmlcl.append
403 # inputs then params in argparse named form
404
405 for (o_v, k, v, koverride) in self.xclsuffix:
406 if koverride > "":
407 k = koverride
408 elif len(k.strip()) == 1:
409 k = "-%s" % k
410 else:
411 k = "--%s" % k
412 aXCL(k)
413 aXCL(v)
414 for (o_v, k, v, koverride) in self.clsuffix:
415 if koverride > "":
416 k = koverride
417 elif len(k.strip()) == 1:
418 k = "-%s" % k
419 else:
420 k = "--%s" % k
421 aCL(k)
422 aCL(v)
423
424 def getNdash(self, newname):
425 if self.is_positional:
426 ndash = 0
427 else:
428 ndash = 2
429 if len(newname) < 2:
430 ndash = 1
431 return ndash
432
433 def doXMLparam(self):
434 """flake8 made me do this..."""
435 for (
436 p
437 ) in (
438 self.outfiles
439 ): # --output_files "$otab.history_name~~~$otab.history_format~~~$otab.history_CL~~~$otab.history_test"
440 newname, newfmt, newcl, test, oldcl = p
441 test = test.strip()
442 ndash = self.getNdash(newcl)
443 aparm = gxtp.OutputData(
444 name=newname, format=newfmt, num_dashes=ndash, label=newcl
445 )
446 aparm.positional = self.is_positional
447 if self.is_positional:
448 if oldcl.upper() == "STDOUT":
449 aparm.positional = 9999999
450 aparm.command_line_override = "> $%s" % newname
451 else:
452 aparm.positional = int(oldcl)
453 aparm.command_line_override = "$%s" % newname
454 self.toutputs.append(aparm)
455 ld = None
456 if test.strip() > "":
457 if test.startswith("diff"):
458 c = "diff"
459 ld = 0
460 if test.split(":")[1].isdigit:
461 ld = int(test.split(":")[1])
462 tp = gxtp.TestOutput(
463 name=newname,
464 value="%s_sample" % newname,
465 format=newfmt,
466 compare=c,
467 lines_diff=ld,
468 )
469 elif test.startswith("sim_size"):
470 c = "sim_size"
471 tn = test.split(":")[1].strip()
472 if tn > "":
473 if "." in tn:
474 delta = None
475 delta_frac = min(1.0, float(tn))
476 else:
477 delta = int(tn)
478 delta_frac = None
479 tp = gxtp.TestOutput(
480 name=newname,
481 value="%s_sample" % newname,
482 format=newfmt,
483 compare=c,
484 delta=delta,
485 delta_frac=delta_frac,
486 )
487 self.testparam.append(tp)
488 for p in self.infiles:
489 newname = p[ICLPOS]
490 newfmt = p[IFMTPOS]
491 ndash = self.getNdash(newname)
492 if not len(p[ILABPOS]) > 0:
493 alab = p[ICLPOS]
494 else:
495 alab = p[ILABPOS]
496 aninput = gxtp.DataParam(
497 newname,
498 optional=False,
499 label=alab,
500 help=p[IHELPOS],
501 format=newfmt,
502 multiple=False,
503 num_dashes=ndash,
504 )
505 aninput.positional = self.is_positional
506 self.tinputs.append(aninput)
507 tparm = gxtp.TestParam(name=newname, value="%s_sample" % newname)
508 self.testparam.append(tparm)
509 for p in self.addpar:
510 (
511 newname,
512 newval,
513 newlabel,
514 newhelp,
515 newtype,
516 newcl,
517 override,
518 oldcl,
519 ) = p
520 if not len(newlabel) > 0:
521 newlabel = newname
522 ndash = self.getNdash(newname)
523 if newtype == "text":
524 aparm = gxtp.TextParam(
525 newname,
526 label=newlabel,
527 help=newhelp,
528 value=newval,
529 num_dashes=ndash,
530 )
531 elif newtype == "integer":
532 aparm = gxtp.IntegerParam(
533 newname,
534 label=newname,
535 help=newhelp,
536 value=newval,
537 num_dashes=ndash,
538 )
539 elif newtype == "float":
540 aparm = gxtp.FloatParam(
541 newname,
542 label=newname,
543 help=newhelp,
544 value=newval,
545 num_dashes=ndash,
546 )
547 else:
548 raise ValueError(
549 'Unrecognised parameter type "%s" for\
550 additional parameter %s in makeXML'
551 % (newtype, newname)
552 )
553 aparm.positional = self.is_positional
554 if self.is_positional:
555 aparm.positional = int(oldcl)
556 self.tinputs.append(aparm)
557 tparm = gxtp.TestParam(newname, value=newval)
558 self.testparam.append(tparm)
559
560 def doNoXMLparam(self):
561 """filter style package - stdin to stdout"""
562 if len(self.infiles) > 0:
563 alab = self.infiles[0][ILABPOS]
564 if len(alab) == 0:
565 alab = self.infiles[0][ICLPOS]
566 max1s = (
567 "Maximum one input if parampass is 0 but multiple input files supplied - %s"
568 % str(self.infiles)
569 )
570 assert len(self.infiles) == 1, max1s
571 newname = self.infiles[0][ICLPOS]
572 aninput = gxtp.DataParam(
573 newname,
574 optional=False,
575 label=alab,
576 help=self.infiles[0][IHELPOS],
577 format=self.infiles[0][IFMTPOS],
578 multiple=False,
579 num_dashes=0,
580 )
581 aninput.command_line_override = "< $%s" % newname
582 aninput.positional = self.is_positional
583 self.tinputs.append(aninput)
584 tp = gxtp.TestParam(name=newname, value="%s_sample" % newname)
585 self.testparam.append(tp)
586 if len(self.outfiles) > 0:
587 newname = self.outfiles[0][OCLPOS]
588 newfmt = self.outfiles[0][OFMTPOS]
589 anout = gxtp.OutputData(newname, format=newfmt, num_dashes=0)
590 anout.command_line_override = "> $%s" % newname
591 anout.positional = self.is_positional
592 self.toutputs.append(anout)
593 tp = gxtp.TestOutput(
594 name=newname, value="%s_sample" % newname, format=newfmt
595 )
596 self.testparam.append(tp)
597
598 def makeXML(self):
599 """
600 Create a Galaxy xml tool wrapper for the new script
601 Uses galaxyhtml
602 Hmmm. How to get the command line into correct order...
603 """
604 if self.command_override:
605 self.newtool.command_override = self.command_override # config file
606 else:
607 self.newtool.command_override = self.xmlcl
608 if self.args.help_text:
609 helptext = open(self.args.help_text, "r").readlines()
610 safertext = "\n".join([cheetah_escape(x) for x in helptext])
611 if self.args.script_path:
612 scr = [x for x in self.spacedScript if x.strip() > ""]
613 scr.insert(0, "\n------\n\n\nScript::\n")
614 if len(scr) > 300:
615 scr = (
616 scr[:100]
617 + [" >300 lines - stuff deleted", " ......"]
618 + scr[-100:]
619 )
620 scr.append("\n")
621 safertext = safertext + "\n".join(scr)
622 self.newtool.help = safertext
623 else:
624 self.newtool.help = (
625 "Please ask the tool author (%s) for help \
626 as none was supplied at tool generation\n"
627 % (self.args.user_email)
628 )
629 self.newtool.version_command = None # do not want
630 requirements = gxtp.Requirements()
631 if self.args.packages:
632 for d in self.args.packages.split(","):
633 ver = ""
634 d = d.replace("==", ":")
635 d = d.replace("=", ":")
636 if ":" in d:
637 packg, ver = d.split(":")
638 else:
639 packg = d
640 requirements.append(
641 gxtp.Requirement("package", packg.strip(), ver.strip())
642 )
643 self.newtool.requirements = requirements
644 if self.args.parampass == "0":
645 self.doNoXMLparam()
646 else:
647 self.doXMLparam()
648 self.newtool.outputs = self.toutputs
649 self.newtool.inputs = self.tinputs
650 if self.args.script_path:
651 configfiles = gxtp.Configfiles()
652 configfiles.append(
653 gxtp.Configfile(name="runme", text="\n".join(self.escapedScript))
654 )
655 self.newtool.configfiles = configfiles
656 tests = gxtp.Tests()
657 test_a = gxtp.Test()
658 for tp in self.testparam:
659 test_a.append(tp)
660 tests.append(test_a)
661 self.newtool.tests = tests
662 self.newtool.add_comment(
663 "Created by %s at %s using the Galaxy Tool Factory."
664 % (self.args.user_email, timenow())
665 )
666 self.newtool.add_comment("Source in git at: %s" % (toolFactoryURL))
667 self.newtool.add_comment(
668 "Cite: Creating re-usable tools from scripts doi:10.1093/bioinformatics/bts573"
669 )
670 exml0 = self.newtool.export()
671 exml = exml0.replace(FAKEEXE, "") # temporary work around until PR accepted
672 if (
673 self.test_override
674 ): # cannot do this inside galaxyxml as it expects lxml objects for tests
675 part1 = exml.split("<tests>")[0]
676 part2 = exml.split("</tests>")[1]
677 fixed = "%s\n%s\n%s" % (part1, self.test_override, part2)
678 exml = fixed
679 # exml = exml.replace('range="1:"', 'range="1000:"')
680 xf = open("%s.xml" % self.tool_name, "w")
681 xf.write(exml)
682 xf.write("\n")
683 xf.close()
684 # ready for the tarball
685
686 def run(self):
687 """
688 generate test outputs by running a command line
689 won't work if command or test override in play - planemo is the
690 easiest way to generate test outputs for that case so is
691 automagically selected
692 """
693 scl = " ".join(self.cl)
694 err = None
695 if self.args.parampass != "0":
696 if os.path.exists(self.elog):
697 ste = open(self.elog, "a")
698 else:
699 ste = open(self.elog, "w")
700 if self.lastclredirect:
701 sto = open(self.lastclredirect[1], "wb") # is name of an output file
702 else:
703 if os.path.exists(self.tlog):
704 sto = open(self.tlog, "a")
705 else:
706 sto = open(self.tlog, "w")
707 sto.write(
708 "## Executing Toolfactory generated command line = %s\n" % scl
709 )
710 sto.flush()
711 subp = subprocess.run(
712 self.cl, env=self.ourenv, shell=False, stdout=sto, stderr=ste
713 )
714 sto.close()
715 ste.close()
716 retval = subp.returncode
717 else: # work around special case - stdin and write to stdout
718 if len(self.infiles) > 0:
719 sti = open(self.infiles[0][IPATHPOS], "rb")
720 else:
721 sti = sys.stdin
722 if len(self.outfiles) > 0:
723 sto = open(self.outfiles[0][ONAMEPOS], "wb")
724 else:
725 sto = sys.stdout
726 subp = subprocess.run(
727 self.cl, env=self.ourenv, shell=False, stdout=sto, stdin=sti
728 )
729 sto.write("## Executing Toolfactory generated command line = %s\n" % scl)
730 retval = subp.returncode
731 sto.close()
732 sti.close()
733 if os.path.isfile(self.tlog) and os.stat(self.tlog).st_size == 0:
734 os.unlink(self.tlog)
735 if os.path.isfile(self.elog) and os.stat(self.elog).st_size == 0:
736 os.unlink(self.elog)
737 if retval != 0 and err: # problem
738 sys.stderr.write(err)
739 logging.debug("run done")
740 return retval
741
742 def copy_to_container(self, src, dest, container):
743 """Recreate the src directory tree at dest - full path included"""
744 idir = os.getcwd()
745 workdir = os.path.dirname(src)
746 os.chdir(workdir)
747 _, tfname = tempfile.mkstemp(suffix=".tar")
748 tar = tarfile.open(tfname, mode="w")
749 srcb = os.path.basename(src)
750 tar.add(srcb)
751 tar.close()
752 data = open(tfname, "rb").read()
753 container.put_archive(dest, data)
754 os.unlink(tfname)
755 os.chdir(idir)
756
757 def copy_from_container(self, src, dest, container):
758 """recreate the src directory tree at dest using docker sdk"""
759 os.makedirs(dest, exist_ok=True)
760 _, tfname = tempfile.mkstemp(suffix=".tar")
761 tf = open(tfname, "wb")
762 bits, stat = container.get_archive(src)
763 for chunk in bits:
764 tf.write(chunk)
765 tf.close()
766 tar = tarfile.open(tfname, "r")
767 tar.extractall(dest)
768 tar.close()
769 os.unlink(tfname)
770
771 def planemo_biodocker_test(self):
772 """planemo currently leaks dependencies if used in the same container and gets unhappy after a
773 first successful run. https://github.com/galaxyproject/planemo/issues/1078#issuecomment-731476930
774
775 Docker biocontainer has planemo with caches filled to save repeated downloads
776
777
778 """
779
780 def prun(container, tout, cl, user="biodocker"):
781 rlog = container.exec_run(cl, user=user)
782 slogl = str(rlog).split("\\n")
783 slog = "\n".join(slogl)
784 tout.write(f"## got rlog {slog} from {cl}\n")
785
786 if os.path.exists(self.tlog):
787 tout = open(self.tlog, "a")
788 else:
789 tout = open(self.tlog, "w")
790 planemoimage = "quay.io/fubar2/planemo-biocontainer"
791 xreal = "%s.xml" % self.tool_name
792 repname = f"{self.tool_name}_planemo_test_report.html"
793 ptestrep_path = os.path.join(self.repdir, repname)
794 client = docker.from_env()
795 tvol = client.volumes.create()
796 tvolname = tvol.name
797 destdir = "/toolfactory/ptest"
798 imrep = os.path.join(destdir, repname)
799 # need to keep the container running so keep it open with sleep
800 # will stop and destroy it when we are done
801 container = client.containers.run(
802 planemoimage,
803 "sleep 120m",
804 detach=True,
805 user="biodocker",
806 volumes={f"{tvolname}": {"bind": "/toolfactory", "mode": "rw"}},
807 )
808 cl = f"mkdir -p {destdir}"
809 prun(container, tout, cl, user="root")
810 # that's how hard it is to get root on a biodocker container :(
811 cl = f"rm -rf {destdir}/*"
812 prun(container, tout, cl, user="root")
813 ptestpath = os.path.join(destdir, "tfout", xreal)
814 self.copy_to_container(self.tooloutdir, destdir, container)
815 cl = "chown -R biodocker /toolfactory"
816 prun(container, tout, cl, user="root")
817 _ = container.exec_run(f"ls -la {destdir}")
818 ptestcl = f"planemo test --update_test_data --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}"
819 try:
820 _ = container.exec_run(ptestcl)
821 # fails because test outputs missing but updates the test-data directory
822 except Error:
823 e = sys.exc_info()[0]
824 tout.write(f"#### error: {e} from {ptestcl}\n")
825 cl = f"planemo test --test_output {imrep} --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}"
826 try:
827 prun(container, tout, cl)
828 except Error:
829 e = sys.exc_info()[0]
830 tout.write(f"#### error: {e} from {ptestcl}\n")
831 testouts = tempfile.mkdtemp(suffix=None, prefix="tftemp", dir=".")
832 self.copy_from_container(destdir, testouts, container)
833 src = os.path.join(testouts, "ptest")
834 if os.path.isdir(src):
835 shutil.copytree(src, ".", dirs_exist_ok=True)
836 src = repname
837 if os.path.isfile(repname):
838 shutil.copyfile(src, ptestrep_path)
839 else:
840 tout.write(f"No output from run to shutil.copytree in {src}\n")
841 tout.close()
842 container.stop()
843 container.remove()
844 tvol.remove()
845 shutil.rmtree(testouts) # leave for debugging
846
847 def shedLoad(self):
848 """
849 use bioblend to create new repository
850 or update existing
851
852 """
853 if os.path.exists(self.tlog):
854 sto = open(self.tlog, "a")
855 else:
856 sto = open(self.tlog, "w")
857
858 ts = toolshed.ToolShedInstance(
859 url=self.args.toolshed_url, key=self.args.toolshed_api_key, verify=False
860 )
861 repos = ts.repositories.get_repositories()
862 rnames = [x.get("name", "?") for x in repos]
863 rids = [x.get("id", "?") for x in repos]
864 tfcat = "ToolFactory generated tools"
865 if self.tool_name not in rnames:
866 tscat = ts.categories.get_categories()
867 cnames = [x.get("name", "?").strip() for x in tscat]
868 cids = [x.get("id", "?") for x in tscat]
869 catID = None
870 if tfcat.strip() in cnames:
871 ci = cnames.index(tfcat)
872 catID = cids[ci]
873 res = ts.repositories.create_repository(
874 name=self.args.tool_name,
875 synopsis="Synopsis:%s" % self.args.tool_desc,
876 description=self.args.tool_desc,
877 type="unrestricted",
878 remote_repository_url=self.args.toolshed_url,
879 homepage_url=None,
880 category_ids=catID,
881 )
882 tid = res.get("id", None)
883 sto.write(f"#create_repository {self.args.tool_name} tid={tid} res={res}\n")
884 else:
885 i = rnames.index(self.tool_name)
886 tid = rids[i]
887 try:
888 res = ts.repositories.update_repository(
889 id=tid, tar_ball_path=self.newtarpath, commit_message=None
890 )
891 sto.write(f"#update res id {id} ={res}\n")
892 except ConnectionError:
893 sto.write(
894 "####### Is the toolshed running and the API key correct? Bioblend shed upload failed\n"
895 )
896 sto.close()
897
898 def eph_galaxy_load(self):
899 """
900 use ephemeris to load the new tool from the local toolshed after planemo uploads it
901 """
902 if os.path.exists(self.tlog):
903 tout = open(self.tlog, "a")
904 else:
905 tout = open(self.tlog, "w")
906 cll = [
907 "shed-tools",
908 "install",
909 "-g",
910 self.args.galaxy_url,
911 "--latest",
912 "-a",
913 self.args.galaxy_api_key,
914 "--name",
915 self.tool_name,
916 "--owner",
917 "fubar",
918 "--toolshed",
919 self.args.toolshed_url,
920 "--section_label",
921 "ToolFactory",
922 ]
923 tout.write("running\n%s\n" % " ".join(cll))
924 subp = subprocess.run(
925 cll, env=self.ourenv, cwd=self.ourcwd, shell=False, stderr=tout, stdout=tout
926 )
927 tout.write(
928 "installed %s - got retcode %d\n" % (self.tool_name, subp.returncode)
929 )
930 tout.close()
931 return subp.returncode
932
933 def writeShedyml(self):
934 """for planemo"""
935 yuser = self.args.user_email.split("@")[0]
936 yfname = os.path.join(self.tooloutdir, ".shed.yml")
937 yamlf = open(yfname, "w")
938 odict = {
939 "name": self.tool_name,
940 "owner": yuser,
941 "type": "unrestricted",
942 "description": self.args.tool_desc,
943 "synopsis": self.args.tool_desc,
944 "category": "TF Generated Tools",
945 }
946 yaml.dump(odict, yamlf, allow_unicode=True)
947 yamlf.close()
948
949 def makeTool(self):
950 """write xmls and input samples into place"""
951 self.makeXML()
952 if self.args.script_path:
953 stname = os.path.join(self.tooloutdir, "%s" % (self.sfile))
954 if not os.path.exists(stname):
955 shutil.copyfile(self.sfile, stname)
956 xreal = "%s.xml" % self.tool_name
957 xout = os.path.join(self.tooloutdir, xreal)
958 shutil.copyfile(xreal, xout)
959 for p in self.infiles:
960 pth = p[IPATHPOS]
961 dest = os.path.join(self.testdir, "%s_sample" % p[ICLPOS])
962 shutil.copyfile(pth, dest)
963
964 def makeToolTar(self):
965 """move outputs into test-data and prepare the tarball"""
966 excludeme = "_planemo_test_report.html"
967
968 def exclude_function(tarinfo):
969 filename = tarinfo.name
970 return None if filename.endswith(excludeme) else tarinfo
971
972 if os.path.exists(self.tlog):
973 tout = open(self.tlog, "a")
974 else:
975 tout = open(self.tlog, "w")
976 for p in self.outfiles:
977 oname = p[ONAMEPOS]
978 tdest = os.path.join(self.testdir, "%s_sample" % oname)
979 if not os.path.isfile(tdest):
980 src = os.path.join(self.testdir, oname)
981 if os.path.isfile(src):
982 shutil.copyfile(src, tdest)
983 dest = os.path.join(self.repdir, "%s.sample" % (oname))
984 shutil.copyfile(src, dest)
985 else:
986 tout.write(
987 "###Output file %s not found in testdir %s. This is normal during the first Planemo run that generates test outputs"
988 % (tdest, self.testdir)
989 )
990 tf = tarfile.open(self.newtarpath, "w:gz")
991 tf.add(name=self.tooloutdir, arcname=self.tool_name, filter=exclude_function)
992 tf.close()
993 shutil.copyfile(self.newtarpath, self.args.new_tool)
994
995 def moveRunOutputs(self):
996 """need to move planemo or run outputs into toolfactory collection"""
997 with os.scandir(self.tooloutdir) as outs:
998 for entry in outs:
999 if not entry.is_file():
1000 continue
1001 if "." in entry.name:
1002 nayme, ext = os.path.splitext(entry.name)
1003 if ext in [".yml", ".xml", ".json", ".yaml"]:
1004 ext = f"{ext}.txt"
1005 else:
1006 ext = ".txt"
1007 ofn = "%s%s" % (entry.name.replace(".", "_"), ext)
1008 dest = os.path.join(self.repdir, ofn)
1009 src = os.path.join(self.tooloutdir, entry.name)
1010 shutil.copyfile(src, dest)
1011 with os.scandir(self.testdir) as outs:
1012 for entry in outs:
1013 if (
1014 (not entry.is_file())
1015 or entry.name.endswith("_sample")
1016 or entry.name.endswith("_planemo_test_report.html")
1017 ):
1018 continue
1019 if "." in entry.name:
1020 nayme, ext = os.path.splitext(entry.name)
1021 else:
1022 ext = ".txt"
1023 newname = f"{entry.name}{ext}"
1024 dest = os.path.join(self.repdir, newname)
1025 src = os.path.join(self.testdir, entry.name)
1026 shutil.copyfile(src, dest)
1027
1028
1029 def main():
1030 """
1031 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml
1032
1033 """
1034 parser = argparse.ArgumentParser()
1035 a = parser.add_argument
1036 a("--script_path", default=None)
1037 a("--history_test", default=None)
1038 a("--cl_prefix", default=None)
1039 a("--sysexe", default=None)
1040 a("--packages", default=None)
1041 a("--tool_name", default="newtool")
1042 a("--tool_dir", default=None)
1043 a("--input_files", default=[], action="append")
1044 a("--output_files", default=[], action="append")
1045 a("--user_email", default="Unknown")
1046 a("--bad_user", default=None)
1047 a("--make_Tool", default="runonly")
1048 a("--help_text", default=None)
1049 a("--tool_desc", default=None)
1050 a("--tool_version", default=None)
1051 a("--citations", default=None)
1052 a("--command_override", default=None)
1053 a("--test_override", default=None)
1054 a("--additional_parameters", action="append", default=[])
1055 a("--edit_additional_parameters", action="store_true", default=False)
1056 a("--parampass", default="positional")
1057 a("--tfout", default="./tfout")
1058 a("--new_tool", default="new_tool")
1059 a("--galaxy_url", default="http://localhost:8080")
1060 a("--toolshed_url", default="http://localhost:9009")
1061 # make sure this is identical to tool_sheds_conf.xml localhost != 127.0.0.1 so validation fails
1062 a("--toolshed_api_key", default="fakekey")
1063 a("--galaxy_api_key", default="fakekey")
1064 a("--galaxy_root", default="/galaxy-central")
1065 a("--galaxy_venv", default="/galaxy_venv")
1066 args = parser.parse_args()
1067 assert not args.bad_user, (
1068 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file'
1069 % (args.bad_user, args.bad_user)
1070 )
1071 assert args.tool_name, "## Tool Factory expects a tool name - eg --tool_name=DESeq"
1072 assert (
1073 args.sysexe or args.packages
1074 ), "## Tool Factory wrapper expects an interpreter or an executable package"
1075 args.input_files = [x.replace('"', "").replace("'", "") for x in args.input_files]
1076 # remove quotes we need to deal with spaces in CL params
1077 for i, x in enumerate(args.additional_parameters):
1078 args.additional_parameters[i] = args.additional_parameters[i].replace('"', "")
1079 r = ScriptRunner(args)
1080 r.writeShedyml()
1081 r.makeTool()
1082 if args.make_Tool == "generate":
1083 _ = r.run() # for testing toolfactory itself
1084 r.moveRunOutputs()
1085 r.makeToolTar()
1086 else:
1087 r.planemo_biodocker_test() # test to make outputs and then test
1088 r.moveRunOutputs()
1089 r.makeToolTar()
1090 if args.make_Tool == "gentestinstall":
1091 r.shedLoad()
1092 r.eph_galaxy_load()
1093
1094
1095 if __name__ == "__main__":
1096 main()