Mercurial > repos > fubar > tool_factory_docker
comparison toolfactory_docker/rgToolFactory2.py @ 2:a5c5652823a6 draft
Uploaded
author | fubar |
---|---|
date | Tue, 05 Jan 2021 00:35:40 +0000 |
parents | |
children | 12331d91c4ad |
comparison
equal
deleted
inserted
replaced
1:0778fb523693 | 2:a5c5652823a6 |
---|---|
1 # replace with shebang for biocontainer | |
2 # see https://github.com/fubar2/toolfactory | |
3 # | |
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012 | |
5 # | |
6 # all rights reserved | |
7 # Licensed under the LGPL | |
8 # suggestions for improvement and bug fixes welcome at | |
9 # https://github.com/fubar2/toolfactory | |
10 # | |
11 # July 2020: BCC was fun and I feel like rip van winkle after 5 years. | |
12 # Decided to | |
13 # 1. Fix the toolfactory so it works - done for simplest case | |
14 # 2. Fix planemo so the toolfactory function works | |
15 # 3. Rewrite bits using galaxyxml functions where that makes sense - done | |
16 # | |
17 # uses planemo in a biodocker sort of image as a requirement | |
18 # otherwise planemo seems to leak dependencies back into the | |
19 # calling venv. Hilarity ensues. | |
20 | |
21 | |
22 # # expects tarball as first, html out as second parameter | |
23 # cp $1 foo.tar.gz | |
24 # tar -xvzf foo.tar.gz | |
25 # TOOLNAME=`find . -name "*.xml"` | |
26 # echo "*****TOOLNAME = $TOOLNAME" | |
27 # planemo test --test_output $2 $TOOLNAME | |
28 | |
29 import argparse | |
30 import copy | |
31 import logging | |
32 import os | |
33 import re | |
34 import shutil | |
35 import subprocess | |
36 import sys | |
37 import tarfile | |
38 import tempfile | |
39 import time | |
40 | |
41 | |
42 from bioblend import ConnectionError | |
43 from bioblend import toolshed | |
44 | |
45 import docker | |
46 | |
47 import galaxyxml.tool as gxt | |
48 import galaxyxml.tool.parameters as gxtp | |
49 | |
50 import lxml | |
51 | |
52 import yaml | |
53 | |
54 | |
55 myversion = "V2.1 July 2020" | |
56 verbose = True | |
57 debug = True | |
58 toolFactoryURL = "https://github.com/fubar2/toolfactory" | |
59 ourdelim = "~~~" | |
60 | |
61 # --input_files="$intab.input_files~~~$intab.input_CL~~~ | |
62 # $intab.input_formats# ~~~$intab.input_label | |
63 # ~~~$intab.input_help" | |
64 IPATHPOS = 0 | |
65 ICLPOS = 1 | |
66 IFMTPOS = 2 | |
67 ILABPOS = 3 | |
68 IHELPOS = 4 | |
69 IOCLPOS = 5 | |
70 | |
71 # --output_files "$otab.history_name~~~$otab.history_format~~~ | |
72 # $otab.history_CL~~~$otab.history_test" | |
73 ONAMEPOS = 0 | |
74 OFMTPOS = 1 | |
75 OCLPOS = 2 | |
76 OTESTPOS = 3 | |
77 OOCLPOS = 4 | |
78 | |
79 | |
80 # --additional_parameters="$i.param_name~~~$i.param_value~~~ | |
81 # $i.param_label~~~$i.param_help~~~$i.param_type | |
82 # ~~~$i.CL~~~i$.param_CLoverride" | |
83 ANAMEPOS = 0 | |
84 AVALPOS = 1 | |
85 ALABPOS = 2 | |
86 AHELPPOS = 3 | |
87 ATYPEPOS = 4 | |
88 ACLPOS = 5 | |
89 AOVERPOS = 6 | |
90 AOCLPOS = 7 | |
91 | |
92 | |
93 foo = len(lxml.__version__) | |
94 # fug you, flake8. Say my name! | |
95 FAKEEXE = "~~~REMOVE~~~ME~~~" | |
96 # need this until a PR/version bump to fix galaxyxml prepending the exe even | |
97 # with override. | |
98 | |
99 | |
100 def timenow(): | |
101 """return current time as a string""" | |
102 return time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time())) | |
103 | |
104 | |
105 def quote_non_numeric(s): | |
106 """return a prequoted string for non-numerics | |
107 useful for perl and Rscript parameter passing? | |
108 """ | |
109 try: | |
110 _ = float(s) | |
111 return s | |
112 except ValueError: | |
113 return '"%s"' % s | |
114 | |
115 | |
116 html_escape_table = { | |
117 "&": "&", | |
118 ">": ">", | |
119 "<": "<", | |
120 "#": "#", | |
121 "$": "$", | |
122 } | |
123 cheetah_escape_table = {"$": "\\$", "#": "\\#"} | |
124 | |
125 | |
126 def html_escape(text): | |
127 """Produce entities within text.""" | |
128 return "".join([html_escape_table.get(c, c) for c in text]) | |
129 | |
130 | |
131 def cheetah_escape(text): | |
132 """Produce entities within text.""" | |
133 return "".join([cheetah_escape_table.get(c, c) for c in text]) | |
134 | |
135 | |
136 def html_unescape(text): | |
137 """Revert entities within text. Multiple character targets so use replace""" | |
138 t = text.replace("&", "&") | |
139 t = t.replace(">", ">") | |
140 t = t.replace("<", "<") | |
141 t = t.replace("\\$", "$") | |
142 t = t.replace("$", "$") | |
143 t = t.replace("#", "#") | |
144 return t | |
145 | |
146 | |
147 def parse_citations(citations_text): | |
148 """""" | |
149 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()] | |
150 citation_tuples = [] | |
151 for citation in citations: | |
152 if citation.startswith("doi"): | |
153 citation_tuples.append(("doi", citation[len("doi") :].strip())) | |
154 else: | |
155 citation_tuples.append(("bibtex", citation[len("bibtex") :].strip())) | |
156 return citation_tuples | |
157 | |
158 | |
159 class Error(Exception): | |
160 """Base class for exceptions in this module.""" | |
161 | |
162 pass | |
163 | |
164 | |
165 class ScriptRunner: | |
166 """Wrapper for an arbitrary script | |
167 uses galaxyxml | |
168 | |
169 """ | |
170 | |
171 def __init__(self, args=None): | |
172 """ | |
173 prepare command line cl for running the tool here | |
174 and prepare elements needed for galaxyxml tool generation | |
175 """ | |
176 self.ourcwd = os.getcwd() | |
177 self.ourenv = copy.deepcopy(os.environ) | |
178 self.infiles = [x.split(ourdelim) for x in args.input_files] | |
179 self.outfiles = [x.split(ourdelim) for x in args.output_files] | |
180 self.addpar = [x.split(ourdelim) for x in args.additional_parameters] | |
181 self.args = args | |
182 self.cleanuppar() | |
183 self.lastclredirect = None | |
184 self.lastxclredirect = None | |
185 self.cl = [] | |
186 self.xmlcl = [] | |
187 self.is_positional = self.args.parampass == "positional" | |
188 if self.args.sysexe: | |
189 self.executeme = self.args.sysexe | |
190 else: | |
191 if self.args.packages: | |
192 self.executeme = self.args.packages.split(",")[0].split(":")[0].strip() | |
193 else: | |
194 self.executeme = None | |
195 aCL = self.cl.append | |
196 aXCL = self.xmlcl.append | |
197 assert args.parampass in [ | |
198 "0", | |
199 "argparse", | |
200 "positional", | |
201 ], 'args.parampass must be "0","positional" or "argparse"' | |
202 self.tool_name = re.sub("[^a-zA-Z0-9_]+", "", args.tool_name) | |
203 self.tool_id = self.tool_name | |
204 self.newtool = gxt.Tool( | |
205 self.tool_name, | |
206 self.tool_id, | |
207 self.args.tool_version, | |
208 self.args.tool_desc, | |
209 FAKEEXE, | |
210 ) | |
211 self.newtarpath = "toolfactory_%s.tgz" % self.tool_name | |
212 self.tooloutdir = "./tfout" | |
213 self.repdir = "./TF_run_report_tempdir" | |
214 self.testdir = os.path.join(self.tooloutdir, "test-data") | |
215 if not os.path.exists(self.tooloutdir): | |
216 os.mkdir(self.tooloutdir) | |
217 if not os.path.exists(self.testdir): | |
218 os.mkdir(self.testdir) | |
219 if not os.path.exists(self.repdir): | |
220 os.mkdir(self.repdir) | |
221 self.tinputs = gxtp.Inputs() | |
222 self.toutputs = gxtp.Outputs() | |
223 self.testparam = [] | |
224 if self.args.script_path: | |
225 self.prepScript() | |
226 if self.args.command_override: | |
227 scos = open(self.args.command_override, "r").readlines() | |
228 self.command_override = [x.rstrip() for x in scos] | |
229 else: | |
230 self.command_override = None | |
231 if self.args.test_override: | |
232 stos = open(self.args.test_override, "r").readlines() | |
233 self.test_override = [x.rstrip() for x in stos] | |
234 else: | |
235 self.test_override = None | |
236 if self.args.cl_prefix: # DIY CL start | |
237 clp = self.args.cl_prefix.split(" ") | |
238 for c in clp: | |
239 aCL(c) | |
240 aXCL(c) | |
241 else: | |
242 if self.args.script_path: | |
243 aCL(self.executeme) | |
244 aCL(self.sfile) | |
245 aXCL(self.executeme) | |
246 aXCL("$runme") | |
247 else: | |
248 aCL(self.executeme) | |
249 aXCL(self.executeme) | |
250 self.elog = os.path.join(self.repdir, "%s_error_log.txt" % self.tool_name) | |
251 self.tlog = os.path.join(self.repdir, "%s_runner_log.txt" % self.tool_name) | |
252 | |
253 if self.args.parampass == "0": | |
254 self.clsimple() | |
255 else: | |
256 clsuffix = [] | |
257 xclsuffix = [] | |
258 for i, p in enumerate(self.infiles): | |
259 if p[IOCLPOS].upper() == "STDIN": | |
260 appendme = [ | |
261 p[ICLPOS], | |
262 p[ICLPOS], | |
263 p[IPATHPOS], | |
264 "< %s" % p[IPATHPOS], | |
265 ] | |
266 xappendme = [ | |
267 p[ICLPOS], | |
268 p[ICLPOS], | |
269 p[IPATHPOS], | |
270 "< $%s" % p[ICLPOS], | |
271 ] | |
272 else: | |
273 appendme = [p[IOCLPOS], p[ICLPOS], p[IPATHPOS], ""] | |
274 xappendme = [p[IOCLPOS], p[ICLPOS], "$%s" % p[ICLPOS], ""] | |
275 clsuffix.append(appendme) | |
276 xclsuffix.append(xappendme) | |
277 for i, p in enumerate(self.outfiles): | |
278 if p[OOCLPOS] == "STDOUT": | |
279 self.lastclredirect = [">", p[ONAMEPOS]] | |
280 self.lastxclredirect = [">", "$%s" % p[OCLPOS]] | |
281 else: | |
282 clsuffix.append([p[OCLPOS], p[ONAMEPOS], p[ONAMEPOS], ""]) | |
283 xclsuffix.append([p[OCLPOS], p[ONAMEPOS], "$%s" % p[ONAMEPOS], ""]) | |
284 for p in self.addpar: | |
285 clsuffix.append([p[AOCLPOS], p[ACLPOS], p[AVALPOS], p[AOVERPOS]]) | |
286 xclsuffix.append( | |
287 [p[AOCLPOS], p[ACLPOS], '"$%s"' % p[ANAMEPOS], p[AOVERPOS]] | |
288 ) | |
289 clsuffix.sort() | |
290 xclsuffix.sort() | |
291 self.xclsuffix = xclsuffix | |
292 self.clsuffix = clsuffix | |
293 if self.args.parampass == "positional": | |
294 self.clpositional() | |
295 else: | |
296 self.clargparse() | |
297 | |
298 def prepScript(self): | |
299 rx = open(self.args.script_path, "r").readlines() | |
300 rx = [x.rstrip() for x in rx] | |
301 rxcheck = [x.strip() for x in rx if x.strip() > ""] | |
302 assert len(rxcheck) > 0, "Supplied script is empty. Cannot run" | |
303 self.script = "\n".join(rx) | |
304 fhandle, self.sfile = tempfile.mkstemp( | |
305 prefix=self.tool_name, suffix="_%s" % (self.executeme) | |
306 ) | |
307 tscript = open(self.sfile, "w") | |
308 tscript.write(self.script) | |
309 tscript.close() | |
310 self.escapedScript = [cheetah_escape(x) for x in rx] | |
311 self.spacedScript = [f" {x}" for x in rx if x.strip() > ""] | |
312 art = "%s.%s" % (self.tool_name, self.executeme) | |
313 artifact = open(art, "wb") | |
314 artifact.write(bytes("\n".join(self.escapedScript), "utf8")) | |
315 artifact.close() | |
316 | |
317 def cleanuppar(self): | |
318 """ positional parameters are complicated by their numeric ordinal""" | |
319 if self.args.parampass == "positional": | |
320 for i, p in enumerate(self.infiles): | |
321 assert ( | |
322 p[ICLPOS].isdigit() or p[ICLPOS].strip().upper() == "STDIN" | |
323 ), "Positional parameters must be ordinal integers - got %s for %s" % ( | |
324 p[ICLPOS], | |
325 p[ILABPOS], | |
326 ) | |
327 for i, p in enumerate(self.outfiles): | |
328 assert ( | |
329 p[OCLPOS].isdigit() or p[OCLPOS].strip().upper() == "STDOUT" | |
330 ), "Positional parameters must be ordinal integers - got %s for %s" % ( | |
331 p[OCLPOS], | |
332 p[ONAMEPOS], | |
333 ) | |
334 for i, p in enumerate(self.addpar): | |
335 assert p[ | |
336 ACLPOS | |
337 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % ( | |
338 p[ACLPOS], | |
339 p[ANAMEPOS], | |
340 ) | |
341 for i, p in enumerate(self.infiles): | |
342 infp = copy.copy(p) | |
343 icl = infp[ICLPOS] | |
344 infp.append(icl) | |
345 if ( | |
346 infp[ICLPOS].isdigit() | |
347 or self.args.parampass == "0" | |
348 or infp[ICLPOS].strip().upper() == "STDOUT" | |
349 ): | |
350 scl = "input%d" % (i + 1) | |
351 infp[ICLPOS] = scl | |
352 self.infiles[i] = infp | |
353 for i, p in enumerate(self.outfiles): | |
354 p.append(p[OCLPOS]) # keep copy | |
355 if (p[OOCLPOS].isdigit() and self.args.parampass != "positional") or p[ | |
356 OOCLPOS | |
357 ].strip().upper() == "STDOUT": | |
358 scl = p[ONAMEPOS] | |
359 p[OCLPOS] = scl | |
360 self.outfiles[i] = p | |
361 for i, p in enumerate(self.addpar): | |
362 p.append(p[ACLPOS]) | |
363 if p[ACLPOS].isdigit(): | |
364 scl = "param%s" % p[ACLPOS] | |
365 p[ACLPOS] = scl | |
366 self.addpar[i] = p | |
367 | |
368 def clsimple(self): | |
369 """no parameters - uses < and > for i/o""" | |
370 aCL = self.cl.append | |
371 aXCL = self.xmlcl.append | |
372 | |
373 if len(self.infiles) > 0: | |
374 aCL("<") | |
375 aCL(self.infiles[0][IPATHPOS]) | |
376 aXCL("<") | |
377 aXCL("$%s" % self.infiles[0][ICLPOS]) | |
378 if len(self.outfiles) > 0: | |
379 aCL(">") | |
380 aCL(self.outfiles[0][OCLPOS]) | |
381 aXCL(">") | |
382 aXCL("$%s" % self.outfiles[0][ONAMEPOS]) | |
383 | |
384 def clpositional(self): | |
385 # inputs in order then params | |
386 aCL = self.cl.append | |
387 for (o_v, k, v, koverride) in self.clsuffix: | |
388 if " " in v: | |
389 aCL("%s" % v) | |
390 else: | |
391 aCL(v) | |
392 aXCL = self.xmlcl.append | |
393 for (o_v, k, v, koverride) in self.xclsuffix: | |
394 aXCL(v) | |
395 if self.lastxclredirect: | |
396 aXCL(self.lastxclredirect[0]) | |
397 aXCL(self.lastxclredirect[1]) | |
398 | |
399 def clargparse(self): | |
400 """argparse style""" | |
401 aCL = self.cl.append | |
402 aXCL = self.xmlcl.append | |
403 # inputs then params in argparse named form | |
404 | |
405 for (o_v, k, v, koverride) in self.xclsuffix: | |
406 if koverride > "": | |
407 k = koverride | |
408 elif len(k.strip()) == 1: | |
409 k = "-%s" % k | |
410 else: | |
411 k = "--%s" % k | |
412 aXCL(k) | |
413 aXCL(v) | |
414 for (o_v, k, v, koverride) in self.clsuffix: | |
415 if koverride > "": | |
416 k = koverride | |
417 elif len(k.strip()) == 1: | |
418 k = "-%s" % k | |
419 else: | |
420 k = "--%s" % k | |
421 aCL(k) | |
422 aCL(v) | |
423 | |
424 def getNdash(self, newname): | |
425 if self.is_positional: | |
426 ndash = 0 | |
427 else: | |
428 ndash = 2 | |
429 if len(newname) < 2: | |
430 ndash = 1 | |
431 return ndash | |
432 | |
433 def doXMLparam(self): | |
434 """flake8 made me do this...""" | |
435 for ( | |
436 p | |
437 ) in ( | |
438 self.outfiles | |
439 ): # --output_files "$otab.history_name~~~$otab.history_format~~~$otab.history_CL~~~$otab.history_test" | |
440 newname, newfmt, newcl, test, oldcl = p | |
441 test = test.strip() | |
442 ndash = self.getNdash(newcl) | |
443 aparm = gxtp.OutputData( | |
444 name=newname, format=newfmt, num_dashes=ndash, label=newcl | |
445 ) | |
446 aparm.positional = self.is_positional | |
447 if self.is_positional: | |
448 if oldcl.upper() == "STDOUT": | |
449 aparm.positional = 9999999 | |
450 aparm.command_line_override = "> $%s" % newname | |
451 else: | |
452 aparm.positional = int(oldcl) | |
453 aparm.command_line_override = "$%s" % newname | |
454 self.toutputs.append(aparm) | |
455 ld = None | |
456 if test.strip() > "": | |
457 if test.startswith("diff"): | |
458 c = "diff" | |
459 ld = 0 | |
460 if test.split(":")[1].isdigit: | |
461 ld = int(test.split(":")[1]) | |
462 tp = gxtp.TestOutput( | |
463 name=newname, | |
464 value="%s_sample" % newname, | |
465 format=newfmt, | |
466 compare=c, | |
467 lines_diff=ld, | |
468 ) | |
469 elif test.startswith("sim_size"): | |
470 c = "sim_size" | |
471 tn = test.split(":")[1].strip() | |
472 if tn > "": | |
473 if "." in tn: | |
474 delta = None | |
475 delta_frac = min(1.0, float(tn)) | |
476 else: | |
477 delta = int(tn) | |
478 delta_frac = None | |
479 tp = gxtp.TestOutput( | |
480 name=newname, | |
481 value="%s_sample" % newname, | |
482 format=newfmt, | |
483 compare=c, | |
484 delta=delta, | |
485 delta_frac=delta_frac, | |
486 ) | |
487 self.testparam.append(tp) | |
488 for p in self.infiles: | |
489 newname = p[ICLPOS] | |
490 newfmt = p[IFMTPOS] | |
491 ndash = self.getNdash(newname) | |
492 if not len(p[ILABPOS]) > 0: | |
493 alab = p[ICLPOS] | |
494 else: | |
495 alab = p[ILABPOS] | |
496 aninput = gxtp.DataParam( | |
497 newname, | |
498 optional=False, | |
499 label=alab, | |
500 help=p[IHELPOS], | |
501 format=newfmt, | |
502 multiple=False, | |
503 num_dashes=ndash, | |
504 ) | |
505 aninput.positional = self.is_positional | |
506 self.tinputs.append(aninput) | |
507 tparm = gxtp.TestParam(name=newname, value="%s_sample" % newname) | |
508 self.testparam.append(tparm) | |
509 for p in self.addpar: | |
510 ( | |
511 newname, | |
512 newval, | |
513 newlabel, | |
514 newhelp, | |
515 newtype, | |
516 newcl, | |
517 override, | |
518 oldcl, | |
519 ) = p | |
520 if not len(newlabel) > 0: | |
521 newlabel = newname | |
522 ndash = self.getNdash(newname) | |
523 if newtype == "text": | |
524 aparm = gxtp.TextParam( | |
525 newname, | |
526 label=newlabel, | |
527 help=newhelp, | |
528 value=newval, | |
529 num_dashes=ndash, | |
530 ) | |
531 elif newtype == "integer": | |
532 aparm = gxtp.IntegerParam( | |
533 newname, | |
534 label=newname, | |
535 help=newhelp, | |
536 value=newval, | |
537 num_dashes=ndash, | |
538 ) | |
539 elif newtype == "float": | |
540 aparm = gxtp.FloatParam( | |
541 newname, | |
542 label=newname, | |
543 help=newhelp, | |
544 value=newval, | |
545 num_dashes=ndash, | |
546 ) | |
547 else: | |
548 raise ValueError( | |
549 'Unrecognised parameter type "%s" for\ | |
550 additional parameter %s in makeXML' | |
551 % (newtype, newname) | |
552 ) | |
553 aparm.positional = self.is_positional | |
554 if self.is_positional: | |
555 aparm.positional = int(oldcl) | |
556 self.tinputs.append(aparm) | |
557 tparm = gxtp.TestParam(newname, value=newval) | |
558 self.testparam.append(tparm) | |
559 | |
560 def doNoXMLparam(self): | |
561 """filter style package - stdin to stdout""" | |
562 if len(self.infiles) > 0: | |
563 alab = self.infiles[0][ILABPOS] | |
564 if len(alab) == 0: | |
565 alab = self.infiles[0][ICLPOS] | |
566 max1s = ( | |
567 "Maximum one input if parampass is 0 but multiple input files supplied - %s" | |
568 % str(self.infiles) | |
569 ) | |
570 assert len(self.infiles) == 1, max1s | |
571 newname = self.infiles[0][ICLPOS] | |
572 aninput = gxtp.DataParam( | |
573 newname, | |
574 optional=False, | |
575 label=alab, | |
576 help=self.infiles[0][IHELPOS], | |
577 format=self.infiles[0][IFMTPOS], | |
578 multiple=False, | |
579 num_dashes=0, | |
580 ) | |
581 aninput.command_line_override = "< $%s" % newname | |
582 aninput.positional = self.is_positional | |
583 self.tinputs.append(aninput) | |
584 tp = gxtp.TestParam(name=newname, value="%s_sample" % newname) | |
585 self.testparam.append(tp) | |
586 if len(self.outfiles) > 0: | |
587 newname = self.outfiles[0][OCLPOS] | |
588 newfmt = self.outfiles[0][OFMTPOS] | |
589 anout = gxtp.OutputData(newname, format=newfmt, num_dashes=0) | |
590 anout.command_line_override = "> $%s" % newname | |
591 anout.positional = self.is_positional | |
592 self.toutputs.append(anout) | |
593 tp = gxtp.TestOutput( | |
594 name=newname, value="%s_sample" % newname, format=newfmt | |
595 ) | |
596 self.testparam.append(tp) | |
597 | |
598 def makeXML(self): | |
599 """ | |
600 Create a Galaxy xml tool wrapper for the new script | |
601 Uses galaxyhtml | |
602 Hmmm. How to get the command line into correct order... | |
603 """ | |
604 if self.command_override: | |
605 self.newtool.command_override = self.command_override # config file | |
606 else: | |
607 self.newtool.command_override = self.xmlcl | |
608 if self.args.help_text: | |
609 helptext = open(self.args.help_text, "r").readlines() | |
610 safertext = "\n".join([cheetah_escape(x) for x in helptext]) | |
611 if self.args.script_path: | |
612 scr = [x for x in self.spacedScript if x.strip() > ""] | |
613 scr.insert(0, "\n------\n\n\nScript::\n") | |
614 if len(scr) > 300: | |
615 scr = ( | |
616 scr[:100] | |
617 + [" >300 lines - stuff deleted", " ......"] | |
618 + scr[-100:] | |
619 ) | |
620 scr.append("\n") | |
621 safertext = safertext + "\n".join(scr) | |
622 self.newtool.help = safertext | |
623 else: | |
624 self.newtool.help = ( | |
625 "Please ask the tool author (%s) for help \ | |
626 as none was supplied at tool generation\n" | |
627 % (self.args.user_email) | |
628 ) | |
629 self.newtool.version_command = None # do not want | |
630 requirements = gxtp.Requirements() | |
631 if self.args.packages: | |
632 for d in self.args.packages.split(","): | |
633 ver = "" | |
634 d = d.replace("==", ":") | |
635 d = d.replace("=", ":") | |
636 if ":" in d: | |
637 packg, ver = d.split(":") | |
638 else: | |
639 packg = d | |
640 requirements.append( | |
641 gxtp.Requirement("package", packg.strip(), ver.strip()) | |
642 ) | |
643 self.newtool.requirements = requirements | |
644 if self.args.parampass == "0": | |
645 self.doNoXMLparam() | |
646 else: | |
647 self.doXMLparam() | |
648 self.newtool.outputs = self.toutputs | |
649 self.newtool.inputs = self.tinputs | |
650 if self.args.script_path: | |
651 configfiles = gxtp.Configfiles() | |
652 configfiles.append( | |
653 gxtp.Configfile(name="runme", text="\n".join(self.escapedScript)) | |
654 ) | |
655 self.newtool.configfiles = configfiles | |
656 tests = gxtp.Tests() | |
657 test_a = gxtp.Test() | |
658 for tp in self.testparam: | |
659 test_a.append(tp) | |
660 tests.append(test_a) | |
661 self.newtool.tests = tests | |
662 self.newtool.add_comment( | |
663 "Created by %s at %s using the Galaxy Tool Factory." | |
664 % (self.args.user_email, timenow()) | |
665 ) | |
666 self.newtool.add_comment("Source in git at: %s" % (toolFactoryURL)) | |
667 self.newtool.add_comment( | |
668 "Cite: Creating re-usable tools from scripts doi:10.1093/bioinformatics/bts573" | |
669 ) | |
670 exml0 = self.newtool.export() | |
671 exml = exml0.replace(FAKEEXE, "") # temporary work around until PR accepted | |
672 if ( | |
673 self.test_override | |
674 ): # cannot do this inside galaxyxml as it expects lxml objects for tests | |
675 part1 = exml.split("<tests>")[0] | |
676 part2 = exml.split("</tests>")[1] | |
677 fixed = "%s\n%s\n%s" % (part1, self.test_override, part2) | |
678 exml = fixed | |
679 # exml = exml.replace('range="1:"', 'range="1000:"') | |
680 xf = open("%s.xml" % self.tool_name, "w") | |
681 xf.write(exml) | |
682 xf.write("\n") | |
683 xf.close() | |
684 # ready for the tarball | |
685 | |
686 def run(self): | |
687 """ | |
688 generate test outputs by running a command line | |
689 won't work if command or test override in play - planemo is the | |
690 easiest way to generate test outputs for that case so is | |
691 automagically selected | |
692 """ | |
693 scl = " ".join(self.cl) | |
694 err = None | |
695 if self.args.parampass != "0": | |
696 if os.path.exists(self.elog): | |
697 ste = open(self.elog, "a") | |
698 else: | |
699 ste = open(self.elog, "w") | |
700 if self.lastclredirect: | |
701 sto = open(self.lastclredirect[1], "wb") # is name of an output file | |
702 else: | |
703 if os.path.exists(self.tlog): | |
704 sto = open(self.tlog, "a") | |
705 else: | |
706 sto = open(self.tlog, "w") | |
707 sto.write( | |
708 "## Executing Toolfactory generated command line = %s\n" % scl | |
709 ) | |
710 sto.flush() | |
711 subp = subprocess.run( | |
712 self.cl, env=self.ourenv, shell=False, stdout=sto, stderr=ste | |
713 ) | |
714 sto.close() | |
715 ste.close() | |
716 retval = subp.returncode | |
717 else: # work around special case - stdin and write to stdout | |
718 if len(self.infiles) > 0: | |
719 sti = open(self.infiles[0][IPATHPOS], "rb") | |
720 else: | |
721 sti = sys.stdin | |
722 if len(self.outfiles) > 0: | |
723 sto = open(self.outfiles[0][ONAMEPOS], "wb") | |
724 else: | |
725 sto = sys.stdout | |
726 subp = subprocess.run( | |
727 self.cl, env=self.ourenv, shell=False, stdout=sto, stdin=sti | |
728 ) | |
729 sto.write("## Executing Toolfactory generated command line = %s\n" % scl) | |
730 retval = subp.returncode | |
731 sto.close() | |
732 sti.close() | |
733 if os.path.isfile(self.tlog) and os.stat(self.tlog).st_size == 0: | |
734 os.unlink(self.tlog) | |
735 if os.path.isfile(self.elog) and os.stat(self.elog).st_size == 0: | |
736 os.unlink(self.elog) | |
737 if retval != 0 and err: # problem | |
738 sys.stderr.write(err) | |
739 logging.debug("run done") | |
740 return retval | |
741 | |
742 def copy_to_container(self, src, dest, container): | |
743 """Recreate the src directory tree at dest - full path included""" | |
744 idir = os.getcwd() | |
745 workdir = os.path.dirname(src) | |
746 os.chdir(workdir) | |
747 _, tfname = tempfile.mkstemp(suffix=".tar") | |
748 tar = tarfile.open(tfname, mode="w") | |
749 srcb = os.path.basename(src) | |
750 tar.add(srcb) | |
751 tar.close() | |
752 data = open(tfname, "rb").read() | |
753 container.put_archive(dest, data) | |
754 os.unlink(tfname) | |
755 os.chdir(idir) | |
756 | |
757 def copy_from_container(self, src, dest, container): | |
758 """recreate the src directory tree at dest using docker sdk""" | |
759 os.makedirs(dest, exist_ok=True) | |
760 _, tfname = tempfile.mkstemp(suffix=".tar") | |
761 tf = open(tfname, "wb") | |
762 bits, stat = container.get_archive(src) | |
763 for chunk in bits: | |
764 tf.write(chunk) | |
765 tf.close() | |
766 tar = tarfile.open(tfname, "r") | |
767 tar.extractall(dest) | |
768 tar.close() | |
769 os.unlink(tfname) | |
770 | |
771 def planemo_biodocker_test(self): | |
772 """planemo currently leaks dependencies if used in the same container and gets unhappy after a | |
773 first successful run. https://github.com/galaxyproject/planemo/issues/1078#issuecomment-731476930 | |
774 | |
775 Docker biocontainer has planemo with caches filled to save repeated downloads | |
776 | |
777 | |
778 """ | |
779 | |
780 def prun(container, tout, cl, user="biodocker"): | |
781 rlog = container.exec_run(cl, user=user) | |
782 slogl = str(rlog).split("\\n") | |
783 slog = "\n".join(slogl) | |
784 tout.write(f"## got rlog {slog} from {cl}\n") | |
785 | |
786 if os.path.exists(self.tlog): | |
787 tout = open(self.tlog, "a") | |
788 else: | |
789 tout = open(self.tlog, "w") | |
790 planemoimage = "quay.io/fubar2/planemo-biocontainer" | |
791 xreal = "%s.xml" % self.tool_name | |
792 repname = f"{self.tool_name}_planemo_test_report.html" | |
793 ptestrep_path = os.path.join(self.repdir, repname) | |
794 client = docker.from_env() | |
795 tvol = client.volumes.create() | |
796 tvolname = tvol.name | |
797 destdir = "/toolfactory/ptest" | |
798 imrep = os.path.join(destdir, repname) | |
799 # need to keep the container running so keep it open with sleep | |
800 # will stop and destroy it when we are done | |
801 container = client.containers.run( | |
802 planemoimage, | |
803 "sleep 120m", | |
804 detach=True, | |
805 user="biodocker", | |
806 volumes={f"{tvolname}": {"bind": "/toolfactory", "mode": "rw"}}, | |
807 ) | |
808 cl = f"mkdir -p {destdir}" | |
809 prun(container, tout, cl, user="root") | |
810 # that's how hard it is to get root on a biodocker container :( | |
811 cl = f"rm -rf {destdir}/*" | |
812 prun(container, tout, cl, user="root") | |
813 ptestpath = os.path.join(destdir, "tfout", xreal) | |
814 self.copy_to_container(self.tooloutdir, destdir, container) | |
815 cl = "chown -R biodocker /toolfactory" | |
816 prun(container, tout, cl, user="root") | |
817 _ = container.exec_run(f"ls -la {destdir}") | |
818 ptestcl = f"planemo test --update_test_data --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}" | |
819 try: | |
820 _ = container.exec_run(ptestcl) | |
821 # fails because test outputs missing but updates the test-data directory | |
822 except Error: | |
823 e = sys.exc_info()[0] | |
824 tout.write(f"#### error: {e} from {ptestcl}\n") | |
825 cl = f"planemo test --test_output {imrep} --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}" | |
826 try: | |
827 prun(container, tout, cl) | |
828 except Error: | |
829 e = sys.exc_info()[0] | |
830 tout.write(f"#### error: {e} from {ptestcl}\n") | |
831 testouts = tempfile.mkdtemp(suffix=None, prefix="tftemp", dir=".") | |
832 self.copy_from_container(destdir, testouts, container) | |
833 src = os.path.join(testouts, "ptest") | |
834 if os.path.isdir(src): | |
835 shutil.copytree(src, ".", dirs_exist_ok=True) | |
836 src = repname | |
837 if os.path.isfile(repname): | |
838 shutil.copyfile(src, ptestrep_path) | |
839 else: | |
840 tout.write(f"No output from run to shutil.copytree in {src}\n") | |
841 tout.close() | |
842 container.stop() | |
843 container.remove() | |
844 tvol.remove() | |
845 shutil.rmtree(testouts) # leave for debugging | |
846 | |
847 def shedLoad(self): | |
848 """ | |
849 use bioblend to create new repository | |
850 or update existing | |
851 | |
852 """ | |
853 if os.path.exists(self.tlog): | |
854 sto = open(self.tlog, "a") | |
855 else: | |
856 sto = open(self.tlog, "w") | |
857 | |
858 ts = toolshed.ToolShedInstance( | |
859 url=self.args.toolshed_url, key=self.args.toolshed_api_key, verify=False | |
860 ) | |
861 repos = ts.repositories.get_repositories() | |
862 rnames = [x.get("name", "?") for x in repos] | |
863 rids = [x.get("id", "?") for x in repos] | |
864 tfcat = "ToolFactory generated tools" | |
865 if self.tool_name not in rnames: | |
866 tscat = ts.categories.get_categories() | |
867 cnames = [x.get("name", "?").strip() for x in tscat] | |
868 cids = [x.get("id", "?") for x in tscat] | |
869 catID = None | |
870 if tfcat.strip() in cnames: | |
871 ci = cnames.index(tfcat) | |
872 catID = cids[ci] | |
873 res = ts.repositories.create_repository( | |
874 name=self.args.tool_name, | |
875 synopsis="Synopsis:%s" % self.args.tool_desc, | |
876 description=self.args.tool_desc, | |
877 type="unrestricted", | |
878 remote_repository_url=self.args.toolshed_url, | |
879 homepage_url=None, | |
880 category_ids=catID, | |
881 ) | |
882 tid = res.get("id", None) | |
883 sto.write(f"#create_repository {self.args.tool_name} tid={tid} res={res}\n") | |
884 else: | |
885 i = rnames.index(self.tool_name) | |
886 tid = rids[i] | |
887 try: | |
888 res = ts.repositories.update_repository( | |
889 id=tid, tar_ball_path=self.newtarpath, commit_message=None | |
890 ) | |
891 sto.write(f"#update res id {id} ={res}\n") | |
892 except ConnectionError: | |
893 sto.write( | |
894 "####### Is the toolshed running and the API key correct? Bioblend shed upload failed\n" | |
895 ) | |
896 sto.close() | |
897 | |
898 def eph_galaxy_load(self): | |
899 """ | |
900 use ephemeris to load the new tool from the local toolshed after planemo uploads it | |
901 """ | |
902 if os.path.exists(self.tlog): | |
903 tout = open(self.tlog, "a") | |
904 else: | |
905 tout = open(self.tlog, "w") | |
906 cll = [ | |
907 "shed-tools", | |
908 "install", | |
909 "-g", | |
910 self.args.galaxy_url, | |
911 "--latest", | |
912 "-a", | |
913 self.args.galaxy_api_key, | |
914 "--name", | |
915 self.tool_name, | |
916 "--owner", | |
917 "fubar", | |
918 "--toolshed", | |
919 self.args.toolshed_url, | |
920 "--section_label", | |
921 "ToolFactory", | |
922 ] | |
923 tout.write("running\n%s\n" % " ".join(cll)) | |
924 subp = subprocess.run( | |
925 cll, env=self.ourenv, cwd=self.ourcwd, shell=False, stderr=tout, stdout=tout | |
926 ) | |
927 tout.write( | |
928 "installed %s - got retcode %d\n" % (self.tool_name, subp.returncode) | |
929 ) | |
930 tout.close() | |
931 return subp.returncode | |
932 | |
933 def writeShedyml(self): | |
934 """for planemo""" | |
935 yuser = self.args.user_email.split("@")[0] | |
936 yfname = os.path.join(self.tooloutdir, ".shed.yml") | |
937 yamlf = open(yfname, "w") | |
938 odict = { | |
939 "name": self.tool_name, | |
940 "owner": yuser, | |
941 "type": "unrestricted", | |
942 "description": self.args.tool_desc, | |
943 "synopsis": self.args.tool_desc, | |
944 "category": "TF Generated Tools", | |
945 } | |
946 yaml.dump(odict, yamlf, allow_unicode=True) | |
947 yamlf.close() | |
948 | |
949 def makeTool(self): | |
950 """write xmls and input samples into place""" | |
951 self.makeXML() | |
952 if self.args.script_path: | |
953 stname = os.path.join(self.tooloutdir, "%s" % (self.sfile)) | |
954 if not os.path.exists(stname): | |
955 shutil.copyfile(self.sfile, stname) | |
956 xreal = "%s.xml" % self.tool_name | |
957 xout = os.path.join(self.tooloutdir, xreal) | |
958 shutil.copyfile(xreal, xout) | |
959 for p in self.infiles: | |
960 pth = p[IPATHPOS] | |
961 dest = os.path.join(self.testdir, "%s_sample" % p[ICLPOS]) | |
962 shutil.copyfile(pth, dest) | |
963 | |
964 def makeToolTar(self): | |
965 """move outputs into test-data and prepare the tarball""" | |
966 excludeme = "_planemo_test_report.html" | |
967 | |
968 def exclude_function(tarinfo): | |
969 filename = tarinfo.name | |
970 return None if filename.endswith(excludeme) else tarinfo | |
971 | |
972 if os.path.exists(self.tlog): | |
973 tout = open(self.tlog, "a") | |
974 else: | |
975 tout = open(self.tlog, "w") | |
976 for p in self.outfiles: | |
977 oname = p[ONAMEPOS] | |
978 tdest = os.path.join(self.testdir, "%s_sample" % oname) | |
979 if not os.path.isfile(tdest): | |
980 src = os.path.join(self.testdir, oname) | |
981 if os.path.isfile(src): | |
982 shutil.copyfile(src, tdest) | |
983 dest = os.path.join(self.repdir, "%s.sample" % (oname)) | |
984 shutil.copyfile(src, dest) | |
985 else: | |
986 tout.write( | |
987 "###Output file %s not found in testdir %s. This is normal during the first Planemo run that generates test outputs" | |
988 % (tdest, self.testdir) | |
989 ) | |
990 tf = tarfile.open(self.newtarpath, "w:gz") | |
991 tf.add(name=self.tooloutdir, arcname=self.tool_name, filter=exclude_function) | |
992 tf.close() | |
993 shutil.copyfile(self.newtarpath, self.args.new_tool) | |
994 | |
995 def moveRunOutputs(self): | |
996 """need to move planemo or run outputs into toolfactory collection""" | |
997 with os.scandir(self.tooloutdir) as outs: | |
998 for entry in outs: | |
999 if not entry.is_file(): | |
1000 continue | |
1001 if "." in entry.name: | |
1002 nayme, ext = os.path.splitext(entry.name) | |
1003 if ext in [".yml", ".xml", ".json", ".yaml"]: | |
1004 ext = f"{ext}.txt" | |
1005 else: | |
1006 ext = ".txt" | |
1007 ofn = "%s%s" % (entry.name.replace(".", "_"), ext) | |
1008 dest = os.path.join(self.repdir, ofn) | |
1009 src = os.path.join(self.tooloutdir, entry.name) | |
1010 shutil.copyfile(src, dest) | |
1011 with os.scandir(self.testdir) as outs: | |
1012 for entry in outs: | |
1013 if ( | |
1014 (not entry.is_file()) | |
1015 or entry.name.endswith("_sample") | |
1016 or entry.name.endswith("_planemo_test_report.html") | |
1017 ): | |
1018 continue | |
1019 if "." in entry.name: | |
1020 nayme, ext = os.path.splitext(entry.name) | |
1021 else: | |
1022 ext = ".txt" | |
1023 newname = f"{entry.name}{ext}" | |
1024 dest = os.path.join(self.repdir, newname) | |
1025 src = os.path.join(self.testdir, entry.name) | |
1026 shutil.copyfile(src, dest) | |
1027 | |
1028 | |
1029 def main(): | |
1030 """ | |
1031 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml | |
1032 | |
1033 """ | |
1034 parser = argparse.ArgumentParser() | |
1035 a = parser.add_argument | |
1036 a("--script_path", default=None) | |
1037 a("--history_test", default=None) | |
1038 a("--cl_prefix", default=None) | |
1039 a("--sysexe", default=None) | |
1040 a("--packages", default=None) | |
1041 a("--tool_name", default="newtool") | |
1042 a("--tool_dir", default=None) | |
1043 a("--input_files", default=[], action="append") | |
1044 a("--output_files", default=[], action="append") | |
1045 a("--user_email", default="Unknown") | |
1046 a("--bad_user", default=None) | |
1047 a("--make_Tool", default="runonly") | |
1048 a("--help_text", default=None) | |
1049 a("--tool_desc", default=None) | |
1050 a("--tool_version", default=None) | |
1051 a("--citations", default=None) | |
1052 a("--command_override", default=None) | |
1053 a("--test_override", default=None) | |
1054 a("--additional_parameters", action="append", default=[]) | |
1055 a("--edit_additional_parameters", action="store_true", default=False) | |
1056 a("--parampass", default="positional") | |
1057 a("--tfout", default="./tfout") | |
1058 a("--new_tool", default="new_tool") | |
1059 a("--galaxy_url", default="http://localhost:8080") | |
1060 a("--toolshed_url", default="http://localhost:9009") | |
1061 # make sure this is identical to tool_sheds_conf.xml localhost != 127.0.0.1 so validation fails | |
1062 a("--toolshed_api_key", default="fakekey") | |
1063 a("--galaxy_api_key", default="fakekey") | |
1064 a("--galaxy_root", default="/galaxy-central") | |
1065 a("--galaxy_venv", default="/galaxy_venv") | |
1066 args = parser.parse_args() | |
1067 assert not args.bad_user, ( | |
1068 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file' | |
1069 % (args.bad_user, args.bad_user) | |
1070 ) | |
1071 assert args.tool_name, "## Tool Factory expects a tool name - eg --tool_name=DESeq" | |
1072 assert ( | |
1073 args.sysexe or args.packages | |
1074 ), "## Tool Factory wrapper expects an interpreter or an executable package" | |
1075 args.input_files = [x.replace('"', "").replace("'", "") for x in args.input_files] | |
1076 # remove quotes we need to deal with spaces in CL params | |
1077 for i, x in enumerate(args.additional_parameters): | |
1078 args.additional_parameters[i] = args.additional_parameters[i].replace('"', "") | |
1079 r = ScriptRunner(args) | |
1080 r.writeShedyml() | |
1081 r.makeTool() | |
1082 if args.make_Tool == "generate": | |
1083 _ = r.run() # for testing toolfactory itself | |
1084 r.moveRunOutputs() | |
1085 r.makeToolTar() | |
1086 else: | |
1087 r.planemo_biodocker_test() # test to make outputs and then test | |
1088 r.moveRunOutputs() | |
1089 r.makeToolTar() | |
1090 if args.make_Tool == "gentestinstall": | |
1091 r.shedLoad() | |
1092 r.eph_galaxy_load() | |
1093 | |
1094 | |
1095 if __name__ == "__main__": | |
1096 main() |