comparison tacrev/test-data/Input_text_file_to_be_reversed_sample @ 1:0183cad9d13b draft

planemo upload
author fubar
date Thu, 22 Feb 2024 10:48:01 +0000
parents
children
comparison
equal deleted inserted replaced
0:2beaae16651e 1:0183cad9d13b
1 # see https://github.com/fubar2/toolfactory
2 #
3 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
4 #
5 # all rights reserved
6 # Licensed under the LGPL
7 # suggestions for improvement and bug fixes welcome at
8 # https://github.com/fubar2/toolfactory
9 #
10 # march 2022: Refactored into two tools - generate and test/install
11 # as part of GTN tutorial development and biocontainer adoption
12 # The tester runs planemo on a non-tested archive, creates the test outputs
13 # and returns a new proper tool with test.
14
15
16
17 import argparse
18 import copy
19 import fcntl
20 import json
21 import os
22 import re
23 import shlex
24 import shutil
25 import subprocess
26 import sys
27 import tarfile
28 import tempfile
29 import time
30
31 from bioblend import galaxy
32
33 import galaxyxml.tool as gxt
34 import galaxyxml.tool.parameters as gxtp
35
36 import lxml.etree as ET
37
38 import yaml
39
40 myversion = "V2.4 March 2022"
41 verbose = True
42 debug = True
43 toolFactoryURL = "https://github.com/fubar2/toolfactory"
44 FAKEEXE = "~~~REMOVE~~~ME~~~"
45 # need this until a PR/version bump to fix galaxyxml prepending the exe even
46 # with override.
47
48
49 def timenow():
50 """return current time as a string"""
51 return time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(time.time()))
52
53
54 cheetah_escape_table = {"$": "\\$", "#": "\\#"}
55
56
57 def cheetah_escape(text):
58 """Produce entities within text."""
59 return "".join([cheetah_escape_table.get(c, c) for c in text])
60
61
62 def parse_citations(citations_text):
63 """"""
64 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
65 citation_tuples = []
66 for citation in citations:
67 if citation.startswith("doi"):
68 citation_tuples.append(("doi", citation[len("doi") :].strip()))
69 else:
70 citation_tuples.append(("bibtex", citation[len("bibtex") :].strip()))
71 return citation_tuples
72
73
74 class Tool_Factory:
75 """Wrapper for an arbitrary script
76 uses galaxyxml
77
78 """
79
80 def __init__(self, args=None): # noqa
81 """
82 prepare command line cl for running the tool here
83 and prepare elements needed for galaxyxml tool generation
84 """
85 self.local_tools = os.path.join(args.galaxy_root,'local_tools')
86 self.ourcwd = os.getcwd()
87 self.collections = []
88 if len(args.collection) > 0:
89 try:
90 self.collections = [
91 json.loads(x) for x in args.collection if len(x.strip()) > 1
92 ]
93 except Exception:
94 print(
95 f"--collections parameter {str(args.collection)} is malformed - should be a dictionary"
96 )
97 try:
98 self.infiles = [
99 json.loads(x) for x in args.input_files if len(x.strip()) > 1
100 ]
101 except Exception:
102 print(
103 f"--input_files parameter {str(args.input_files)} is malformed - should be a dictionary"
104 )
105 try:
106 self.outfiles = [
107 json.loads(x) for x in args.output_files if len(x.strip()) > 1
108 ]
109 except Exception:
110 print(
111 f"--output_files parameter {args.output_files} is malformed - should be a dictionary"
112 )
113 assert (len(self.outfiles) + len(self.collections)) > 0, 'No outfiles or output collections specified. The Galaxy job runner will fail without an output of some sort'
114 try:
115 self.addpar = [
116 json.loads(x) for x in args.additional_parameters if len(x.strip()) > 1
117 ]
118 except Exception:
119 print(
120 f"--additional_parameters {args.additional_parameters} is malformed - should be a dictionary"
121 )
122 try:
123 self.selpar = [
124 json.loads(x) for x in args.selecttext_parameters if len(x.strip()) > 1
125 ]
126 except Exception:
127 print(
128 f"--selecttext_parameters {args.selecttext_parameters} is malformed - should be a dictionary"
129 )
130 self.args = args
131 self.cleanuppar()
132 self.lastxclredirect = None
133 self.xmlcl = []
134 self.is_positional = self.args.parampass == "positional"
135 if self.args.sysexe:
136 if " " in self.args.sysexe:
137 self.executeme = shlex.split(self.args.sysexe)
138 else:
139 self.executeme = [
140 self.args.sysexe,
141 ]
142 else:
143 if self.args.packages:
144 self.executeme = [
145 self.args.packages.split(",")[0].split(":")[0].strip(),
146 ]
147 else:
148 self.executeme = None
149 aXCL = self.xmlcl.append
150 assert args.parampass in [
151 "0",
152 "argparse",
153 "positional",
154 ], 'args.parampass must be "0","positional" or "argparse"'
155 self.tool_name = re.sub("[^a-zA-Z0-9_]+", "", args.tool_name)
156 self.tool_id = self.tool_name
157 self.newtool = gxt.Tool(
158 self.tool_name,
159 self.tool_id,
160 self.args.tool_version,
161 self.args.tool_desc,
162 FAKEEXE,
163 )
164 self.tooloutdir = "./tfout"
165 self.repdir = "./toolgen"
166 self.newtarpath = args.untested_tool_out # os.path.join(self.tooloutdir, "%s_not_tested_toolshed.gz" % self.tool_name)
167 self.testdir = os.path.join(self.tooloutdir, "test-data")
168 if not os.path.exists(self.tooloutdir):
169 os.mkdir(self.tooloutdir)
170 if not os.path.exists(self.testdir):
171 os.mkdir(self.testdir)
172 if not os.path.exists(self.repdir):
173 os.mkdir(self.repdir)
174 self.tlog = os.path.join(self.repdir,'%s_TF_run_log.txt' % self.tool_name)
175 self.tinputs = gxtp.Inputs()
176 self.toutputs = gxtp.Outputs()
177 self.testparam = []
178 if self.args.script_path:
179 self.prepScript()
180 if self.args.command_override:
181 scos = open(self.args.command_override, "r").readlines()
182 self.command_override = [x.rstrip() for x in scos]
183 else:
184 self.command_override = None
185 if self.args.test_override:
186 stos = open(self.args.test_override, "r").readlines()
187 self.test_override = [x.rstrip() for x in stos]
188 else:
189 self.test_override = None
190 if self.args.script_path:
191 for ex in self.executeme:
192 aXCL(ex)
193 aXCL("$runme")
194 else:
195 for ex in self.executeme:
196 aXCL(ex)
197
198 if self.args.parampass == "0":
199 self.clsimple()
200 else:
201 if self.args.parampass == "positional":
202 self.prepclpos()
203 self.clpositional()
204 else:
205 self.prepargp()
206 self.clargparse()
207
208 def clsimple(self):
209 """no parameters or repeats - uses < and > for i/o"""
210 aXCL = self.xmlcl.append
211 if len(self.infiles) > 0:
212 aXCL("<")
213 aXCL("$%s" % self.infiles[0]["infilename"])
214 if len(self.outfiles) > 0:
215 aXCL(">")
216 aXCL("$%s" % self.outfiles[0]["name"])
217 if self.args.cl_user_suffix: # DIY CL end
218 clp = shlex.split(self.args.cl_user_suffix)
219 for c in clp:
220 aXCL(c)
221
222 def prepargp(self):
223 xclsuffix = []
224 for i, p in enumerate(self.infiles):
225 nam = p["infilename"]
226 if p["origCL"].strip().upper() == "STDIN":
227 xappendme = [
228 nam,
229 nam,
230 "< $%s" % nam,
231 ]
232 else:
233 rep = p["repeat"] == "1"
234 over = ""
235 if rep:
236 over = f'#for $rep in $R_{nam}:\n--{nam} "$rep.{nam}"\n#end for'
237 xappendme = [p["CL"], "$%s" % p["CL"], over]
238 xclsuffix.append(xappendme)
239 for i, p in enumerate(self.outfiles):
240 if p["origCL"].strip().upper() == "STDOUT":
241 self.lastxclredirect = [">", "$%s" % p["name"]]
242 else:
243 xclsuffix.append([p["name"], "$%s" % p["name"], ""])
244 for p in self.addpar:
245 nam = p["name"]
246 rep = p["repeat"] == "1"
247 if rep:
248 over = f'#for $rep in $R_{nam}:\n--{nam} "$rep.{nam}"\n#end for'
249 else:
250 over = p["override"]
251 xclsuffix.append([p["CL"], '"$%s"' % nam, over])
252 for p in self.selpar:
253 xclsuffix.append([p["CL"], '"$%s"' % p["name"], p["override"]])
254 self.xclsuffix = xclsuffix
255
256 def prepclpos(self):
257 xclsuffix = []
258 for i, p in enumerate(self.infiles):
259 if p["origCL"].strip().upper() == "STDIN":
260 xappendme = [
261 "999",
262 p["infilename"],
263 "< $%s" % p["infilename"],
264 ]
265 else:
266 xappendme = [p["CL"], "$%s" % p["infilename"], ""]
267 xclsuffix.append(xappendme)
268 for i, p in enumerate(self.outfiles):
269 if p["origCL"].strip().upper() == "STDOUT":
270 self.lastxclredirect = [">", "$%s" % p["name"]]
271 else:
272 xclsuffix.append([p["CL"], "$%s" % p["name"], ""])
273 for p in self.addpar:
274 nam = p["name"]
275 rep = p["repeat"] == "1" # repeats make NO sense
276 if rep:
277 print(
278 f"### warning. Repeats for {nam} ignored - not permitted in positional parameter command lines!"
279 )
280 over = p["override"]
281 xclsuffix.append([p["CL"], '"$%s"' % nam, over])
282 for p in self.selpar:
283 xclsuffix.append([p["CL"], '"$%s"' % p["name"], p["override"]])
284 xclsuffix.sort()
285 self.xclsuffix = xclsuffix
286
287 def prepScript(self):
288 rx = open(self.args.script_path, "r").readlines()
289 rx = [x.rstrip() for x in rx]
290 rxcheck = [x.strip() for x in rx if x.strip() > ""]
291 assert len(rxcheck) > 0, "Supplied script is empty. Cannot run"
292 self.script = "\n".join(rx)
293 fhandle, self.sfile = tempfile.mkstemp(
294 prefix=self.tool_name, suffix="_%s" % (self.executeme[0])
295 )
296 tscript = open(self.sfile, "w")
297 tscript.write(self.script)
298 tscript.close()
299 self.spacedScript = [f" {x}" for x in rx if x.strip() > ""]
300 rx.insert(0, "#raw")
301 rx.append("#end raw")
302 self.escapedScript = rx
303 art = "%s.%s" % (self.tool_name, self.executeme[0])
304 artifact = open(art, "wb")
305 artifact.write(bytes(self.script, "utf8"))
306 artifact.close()
307
308 def cleanuppar(self):
309 """ positional parameters are complicated by their numeric ordinal"""
310 if self.args.parampass == "positional":
311 for i, p in enumerate(self.infiles):
312 assert (
313 p["CL"].isdigit() or p["CL"].strip().upper() == "STDIN"
314 ), "Positional parameters must be ordinal integers - got %s for %s" % (
315 p["CL"],
316 p["label"],
317 )
318 for i, p in enumerate(self.outfiles):
319 assert (
320 p["CL"].isdigit() or p["CL"].strip().upper() == "STDOUT"
321 ), "Positional parameters must be ordinal integers - got %s for %s" % (
322 p["CL"],
323 p["name"],
324 )
325 for i, p in enumerate(self.addpar):
326 assert p[
327 "CL"
328 ].isdigit(), "Positional parameters must be ordinal integers - got %s for %s" % (
329 p["CL"],
330 p["name"],
331 )
332 for i, p in enumerate(self.infiles):
333 infp = copy.copy(p)
334 infp["origCL"] = infp["CL"]
335 if self.args.parampass in ["positional", "0"]:
336 infp["infilename"] = infp["label"].replace(" ", "_")
337 else:
338 infp["infilename"] = infp["CL"]
339 self.infiles[i] = infp
340 for i, p in enumerate(self.outfiles):
341 outfp = copy.copy(p)
342 outfp["origCL"] = outfp["CL"] # keep copy
343 self.outfiles[i] = outfp
344 for i, p in enumerate(self.addpar):
345 addp = copy.copy(p)
346 addp["origCL"] = addp["CL"]
347 self.addpar[i] = addp
348
349 def clpositional(self):
350 # inputs in order then params
351 aXCL = self.xmlcl.append
352 for (k, v, koverride) in self.xclsuffix:
353 aXCL(v)
354 if self.lastxclredirect:
355 for cl in self.lastxclredirect:
356 aXCL(cl)
357 if self.args.cl_user_suffix: # DIY CL end
358 clp = shlex.split(self.args.cl_user_suffix)
359 for c in clp:
360 aXCL(c)
361
362 def clargparse(self):
363 """argparse style"""
364 aXCL = self.xmlcl.append
365 # inputs then params in argparse named form
366
367 for (k, v, koverride) in self.xclsuffix:
368 if koverride > "":
369 k = koverride
370 aXCL(k)
371 else:
372 if len(k.strip()) == 1:
373 k = "-%s" % k
374 else:
375 k = "--%s" % k
376 aXCL(k)
377 aXCL(v)
378 if self.lastxclredirect:
379 for cl in self.lastxclredirect:
380 aXCL(cl)
381 if self.args.cl_user_suffix: # DIY CL end
382 clp = shlex.split(self.args.cl_user_suffix)
383 for c in clp:
384 aXCL(c)
385
386 def getNdash(self, newname):
387 if self.is_positional:
388 ndash = 0
389 else:
390 ndash = 2
391 if len(newname) < 2:
392 ndash = 1
393 return ndash
394
395 def doXMLparam(self): # noqa
396 """Add all needed elements to tool"""
397 for p in self.outfiles:
398 newname = p["name"]
399 newfmt = p["format"]
400 newcl = p["CL"]
401 test = p["test"]
402 oldcl = p["origCL"]
403 test = test.strip()
404 ndash = self.getNdash(newcl)
405 aparm = gxtp.OutputData(
406 name=newname, format=newfmt, num_dashes=ndash, label=newname
407 )
408 aparm.positional = self.is_positional
409 if self.is_positional:
410 if oldcl.upper() == "STDOUT":
411 aparm.positional = 9999999
412 aparm.command_line_override = "> $%s" % newname
413 else:
414 aparm.positional = int(oldcl)
415 aparm.command_line_override = "$%s" % newname
416 self.toutputs.append(aparm)
417 ld = None
418 if test.strip() > "":
419 if test.strip().startswith("diff"):
420 c = "diff"
421 ld = 0
422 if test.split(":")[1].isdigit:
423 ld = int(test.split(":")[1])
424 tp = gxtp.TestOutput(
425 name=newname,
426 value="%s_sample" % newname,
427 compare=c,
428 lines_diff=ld,
429 )
430 elif test.startswith("sim_size"):
431 c = "sim_size"
432 tn = test.split(":")[1].strip()
433 if tn > "":
434 if "." in tn:
435 delta = None
436 delta_frac = min(1.0, float(tn))
437 else:
438 delta = int(tn)
439 delta_frac = None
440 tp = gxtp.TestOutput(
441 name=newname,
442 value="%s_sample" % newname,
443 compare=c,
444 delta=delta,
445 delta_frac=delta_frac,
446 )
447 else:
448 c = test
449 tp = gxtp.TestOutput(
450 name=newname,
451 value="%s_sample" % newname,
452 compare=c,
453 )
454 self.testparam.append(tp)
455 for p in self.infiles:
456 newname = p["infilename"]
457 newfmt = p["format"]
458 ndash = self.getNdash(newname)
459 reps = p.get("repeat", "0") == "1"
460 if not len(p["label"]) > 0:
461 alab = p["CL"]
462 else:
463 alab = p["label"]
464 aninput = gxtp.DataParam(
465 newname,
466 optional=False,
467 label=alab,
468 help=p["help"],
469 format=newfmt,
470 multiple=False,
471 num_dashes=ndash,
472 )
473 aninput.positional = self.is_positional
474 if self.is_positional:
475 if p["origCL"].upper() == "STDIN":
476 aninput.positional = 9999998
477 aninput.command_line_override = "< $%s" % newname
478 else:
479 aninput.positional = int(p["origCL"])
480 aninput.command_line_override = "$%s" % newname
481 if reps:
482 repe = gxtp.Repeat(
483 name=f"R_{newname}", title=f"Add as many {alab} as needed"
484 )
485 repe.append(aninput)
486 self.tinputs.append(repe)
487 tparm = gxtp.TestRepeat(name=f"R_{newname}")
488 tparm2 = gxtp.TestParam(newname, value="%s_sample" % newname)
489 tparm.append(tparm2)
490 self.testparam.append(tparm)
491 else:
492 self.tinputs.append(aninput)
493 tparm = gxtp.TestParam(newname, value="%s_sample" % newname)
494 self.testparam.append(tparm)
495 for p in self.addpar:
496 newname = p["name"]
497 newval = p["value"]
498 newlabel = p["label"]
499 newhelp = p["help"]
500 newtype = p["type"]
501 newcl = p["CL"]
502 oldcl = p["origCL"]
503 reps = p["repeat"] == "1"
504 if not len(newlabel) > 0:
505 newlabel = newname
506 ndash = self.getNdash(newname)
507 if newtype == "text":
508 aparm = gxtp.TextParam(
509 newname,
510 label=newlabel,
511 help=newhelp,
512 value=newval,
513 num_dashes=ndash,
514 )
515 elif newtype == "integer":
516 aparm = gxtp.IntegerParam(
517 newname,
518 label=newlabel,
519 help=newhelp,
520 value=newval,
521 num_dashes=ndash,
522 )
523 elif newtype == "float":
524 aparm = gxtp.FloatParam(
525 newname,
526 label=newlabel,
527 help=newhelp,
528 value=newval,
529 num_dashes=ndash,
530 )
531 elif newtype == "boolean":
532 aparm = gxtp.BooleanParam(
533 newname,
534 label=newlabel,
535 help=newhelp,
536 value=newval,
537 num_dashes=ndash,
538 )
539 else:
540 raise ValueError(
541 'Unrecognised parameter type "%s" for\
542 additional parameter %s in makeXML'
543 % (newtype, newname)
544 )
545 aparm.positional = self.is_positional
546 if self.is_positional:
547 aparm.positional = int(oldcl)
548 if reps:
549 repe = gxtp.Repeat(
550 name=f"R_{newname}", title=f"Add as many {newlabel} as needed"
551 )
552 repe.append(aparm)
553 self.tinputs.append(repe)
554 tparm = gxtp.TestRepeat(name=f"R_{newname}")
555 tparm2 = gxtp.TestParam(newname, value=newval)
556 tparm.append(tparm2)
557 self.testparam.append(tparm)
558 else:
559 self.tinputs.append(aparm)
560 tparm = gxtp.TestParam(newname, value=newval)
561 self.testparam.append(tparm)
562 for p in self.selpar:
563 newname = p["name"]
564 newval = p["value"]
565 newlabel = p["label"]
566 newhelp = p["help"]
567 newtype = p["type"]
568 newcl = p["CL"]
569 if not len(newlabel) > 0:
570 newlabel = newname
571 ndash = self.getNdash(newname)
572 if newtype == "selecttext":
573 newtext = p["texts"]
574 aparm = gxtp.SelectParam(
575 newname,
576 label=newlabel,
577 help=newhelp,
578 num_dashes=ndash,
579 )
580 for i in range(len(newval)):
581 anopt = gxtp.SelectOption(
582 value=newval[i],
583 text=newtext[i],
584 )
585 aparm.append(anopt)
586 aparm.positional = self.is_positional
587 if self.is_positional:
588 aparm.positional = int(newcl)
589 self.tinputs.append(aparm)
590 tparm = gxtp.TestParam(newname, value=newval)
591 self.testparam.append(tparm)
592 else:
593 raise ValueError(
594 'Unrecognised parameter type "%s" for\
595 selecttext parameter %s in makeXML'
596 % (newtype, newname)
597 )
598 for p in self.collections:
599 newkind = p["kind"]
600 newname = p["name"]
601 newlabel = p["label"]
602 newdisc = p["discover"]
603 collect = gxtp.OutputCollection(newname, label=newlabel, type=newkind)
604 disc = gxtp.DiscoverDatasets(
605 pattern=newdisc, directory=f"{newname}", visible="false"
606 )
607 collect.append(disc)
608 self.toutputs.append(collect)
609 try:
610 tparm = gxtp.TestOutputCollection(newname) # broken until PR merged.
611 self.testparam.append(tparm)
612 except Exception:
613 print(
614 "#### WARNING: Galaxyxml version does not have the PR merged yet - tests for collections must be over-ridden until then!"
615 )
616
617 def doNoXMLparam(self):
618 """filter style package - stdin to stdout"""
619 if len(self.infiles) > 0:
620 alab = self.infiles[0]["label"]
621 if len(alab) == 0:
622 alab = self.infiles[0]["infilename"]
623 max1s = (
624 "Maximum one input if parampass is 0 but multiple input files supplied - %s"
625 % str(self.infiles)
626 )
627 assert len(self.infiles) == 1, max1s
628 newname = self.infiles[0]["infilename"]
629 aninput = gxtp.DataParam(
630 newname,
631 optional=False,
632 label=alab,
633 help=self.infiles[0]["help"],
634 format=self.infiles[0]["format"],
635 multiple=False,
636 num_dashes=0,
637 )
638 aninput.command_line_override = "< $%s" % newname
639 aninput.positional = True
640 self.tinputs.append(aninput)
641 tp = gxtp.TestParam(name=newname, value="%s_sample" % newname)
642 self.testparam.append(tp)
643 if len(self.outfiles) > 0:
644 newname = self.outfiles[0]["name"]
645 newfmt = self.outfiles[0]["format"]
646 anout = gxtp.OutputData(newname, format=newfmt, num_dashes=0)
647 anout.command_line_override = "> $%s" % newname
648 anout.positional = self.is_positional
649 self.toutputs.append(anout)
650 tp = gxtp.TestOutput(name=newname, value="%s_sample" % newname)
651 self.testparam.append(tp)
652
653 def makeXML(self): # noqa
654 """
655 Create a Galaxy xml tool wrapper for the new script
656 Uses galaxyhtml
657 Hmmm. How to get the command line into correct order...
658 """
659 if self.command_override:
660 self.newtool.command_override = self.command_override # config file
661 else:
662 self.newtool.command_override = self.xmlcl
663 cite = gxtp.Citations()
664 acite = gxtp.Citation(type="doi", value="10.1093/bioinformatics/bts573")
665 cite.append(acite)
666 self.newtool.citations = cite
667 safertext = ""
668 if self.args.help_text:
669 helptext = open(self.args.help_text, "r").readlines()
670 safertext = "\n".join([cheetah_escape(x) for x in helptext])
671 if len(safertext.strip()) == 0:
672 safertext = (
673 "Ask the tool author (%s) to rebuild with help text please\n"
674 % (self.args.user_email)
675 )
676 if self.args.script_path:
677 if len(safertext) > 0:
678 safertext = safertext + "\n\n------\n" # transition allowed!
679 scr = [x for x in self.spacedScript if x.strip() > ""]
680 scr.insert(0, "\n\nScript::\n")
681 if len(scr) > 300:
682 scr = (
683 scr[:100]
684 + [" >300 lines - stuff deleted", " ......"]
685 + scr[-100:]
686 )
687 scr.append("\n")
688 safertext = safertext + "\n".join(scr)
689 self.newtool.help = safertext
690 self.newtool.version_command = f'echo "{self.args.tool_version}"'
691 std = gxtp.Stdios()
692 std1 = gxtp.Stdio()
693 std.append(std1)
694 self.newtool.stdios = std
695 requirements = gxtp.Requirements()
696 self.condaenv = []
697 if self.args.packages:
698 try:
699 for d in self.args.packages.split(","):
700 ver = None
701 packg = None
702 d = d.replace("==", ":")
703 d = d.replace("=", ":")
704 if ":" in d:
705 packg, ver = d.split(":")
706 ver = ver.strip()
707 packg = packg.strip()
708 else:
709 packg = d.strip()
710 ver = None
711 if ver == "":
712 ver = None
713 if packg:
714 requirements.append(
715 gxtp.Requirement("package", packg.strip(), ver)
716 )
717 self.condaenv.append(d)
718 except Exception:
719 print(
720 "### malformed packages string supplied - cannot parse =",
721 self.args.packages,
722 )
723 sys.exit(2)
724 self.newtool.requirements = requirements
725 if self.args.parampass == "0":
726 self.doNoXMLparam()
727 else:
728 self.doXMLparam()
729 self.newtool.outputs = self.toutputs
730 self.newtool.inputs = self.tinputs
731 if self.args.script_path:
732 configfiles = gxtp.Configfiles()
733 configfiles.append(
734 gxtp.Configfile(name="runme", text="\n".join(self.escapedScript))
735 )
736 self.newtool.configfiles = configfiles
737 tests = gxtp.Tests()
738 test_a = gxtp.Test()
739 for tp in self.testparam:
740 test_a.append(tp)
741 tests.append(test_a)
742 self.newtool.tests = tests
743 self.newtool.add_comment(
744 "Created by %s at %s using the Galaxy Tool Factory."
745 % (self.args.user_email, timenow())
746 )
747 self.newtool.add_comment("Source in git at: %s" % (toolFactoryURL))
748 exml0 = self.newtool.export()
749 exml = exml0.replace(FAKEEXE, "") # temporary work around until PR accepted
750 if (
751 self.test_override
752 ): # cannot do this inside galaxyxml as it expects lxml objects for tests
753 part1 = exml.split("<tests>")[0]
754 part2 = exml.split("</tests>")[1]
755 fixed = "%s\n%s\n%s" % (part1, "\n".join(self.test_override), part2)
756 exml = fixed
757 with open("%s.xml" % self.tool_name, "w") as xf:
758 xf.write(exml)
759 xf.write("\n")
760 # galaxy history item
761
762 def writeShedyml(self):
763 """for planemo"""
764 yuser = self.args.user_email.split("@")[0]
765 yfname = os.path.join(self.tooloutdir, ".shed.yml")
766 yamlf = open(yfname, "w")
767 odict = {
768 "name": self.tool_name,
769 "owner": yuser,
770 "type": "unrestricted",
771 "description": self.args.tool_desc,
772 "synopsis": self.args.tool_desc,
773 "category": "TF Generated Tools",
774 }
775 yaml.dump(odict, yamlf, allow_unicode=True)
776 yamlf.close()
777
778 def makeTool(self):
779 """write xmls and input samples into place"""
780 if self.args.parampass == 0:
781 self.doNoXMLparam()
782 else:
783 self.makeXML()
784 if self.args.script_path:
785 stname = os.path.join(self.tooloutdir, self.sfile)
786 if not os.path.exists(stname):
787 shutil.copyfile(self.sfile, stname)
788 xreal = "%s.xml" % self.tool_name
789 xout = os.path.join(self.tooloutdir, xreal)
790 shutil.copyfile(xreal, xout)
791 xout = os.path.join(self.repdir, xreal)
792 shutil.copyfile(xreal, xout)
793 for p in self.infiles:
794 pth = p["name"]
795 dest = os.path.join(self.testdir, "%s_sample" % p["infilename"])
796 shutil.copyfile(pth, dest)
797 dest = os.path.join(
798 self.repdir, "%s_sample.%s" % (p["infilename"], p["format"])
799 )
800 shutil.copyfile(pth, dest)
801 dest = os.path.join(self.local_tools, self.tool_name)
802 shutil.copytree(self.tooloutdir,dest, dirs_exist_ok=True)
803
804 def makeToolTar(self, report_fail=False):
805 """move outputs into test-data and prepare the tarball"""
806 excludeme = "_planemo_test_report.html"
807
808 def exclude_function(tarinfo):
809 filename = tarinfo.name
810 return None if filename.endswith(excludeme) else tarinfo
811
812 for p in self.outfiles:
813 oname = p["name"]
814 tdest = os.path.join(self.testdir, "%s_sample" % oname)
815 src = os.path.join(self.testdir, oname)
816 if not os.path.isfile(tdest):
817 if os.path.isfile(src):
818 shutil.copyfile(src, tdest)
819 dest = os.path.join(self.repdir, "%s.sample.%s" % (oname,p['format']))
820 shutil.copyfile(src, dest)
821 else:
822 if report_fail:
823 print(
824 "###Tool may have failed - output file %s not found in testdir after planemo run %s."
825 % (tdest, self.testdir)
826 )
827 tf = tarfile.open(self.newtarpath, "w:gz")
828 tf.add(
829 name=self.tooloutdir,
830 arcname=self.tool_name,
831 filter=exclude_function,
832 )
833 shutil.copy(self.newtarpath, os.path.join(self.tooloutdir, f"{self.tool_name}_untested_toolshed.gz"))
834 tf.close()
835
836
837 def planemo_test_update(self):
838 """planemo is a requirement so is available for testing
839 """
840 xreal = "%s.xml" % self.tool_name
841 tool_test_path = os.path.join(
842 self.repdir, f"{self.tool_name}_planemo_test_report.html"
843 )
844 if os.path.exists(self.tlog):
845 tout = open(self.tlog, "a")
846 else:
847 tout = open(self.tlog, "w")
848 cll = [
849 "planemo",
850 "test",
851 "--conda_auto_init",
852 "--biocontainers",
853 "--test_data",
854 os.path.abspath(self.testdir),
855 "--test_output",
856 os.path.abspath(tool_test_path),
857 "--galaxy_root",
858 self.args.galaxy_root,
859 "--update_test_data",
860 os.path.abspath(xreal),
861 ]
862 p = subprocess.run(
863 cll,
864 shell=False,
865 cwd=self.tooloutdir,
866 stderr=tout,
867 stdout=tout,
868 )
869 tout.close()
870 return p.returncode
871
872
873 def update_toolconf(self ):
874
875 def sortchildrenby(parent, attr):
876 parent[:] = sorted(parent, key=lambda child: child.get(attr))
877
878 tcpath = os.path.join(self.args.galaxy_root,'config/local_tool_conf.xml')
879 xmlfile = os.path.join(self.local_tools, self.tool_name, '%s.xml' % self.tool_name)
880 parser = ET.XMLParser(remove_blank_text=True)
881 tree = ET.parse(tcpath, parser)
882 root = tree.getroot()
883 hasTF = False
884 e = root.findall("section")
885 if len(e) > 0:
886 hasTF = True
887 TFsection = e[0]
888 if not hasTF:
889 TFsection = ET.Element("section", {"id":"localtools", "name":"Local Tools"})
890 root.insert(0, TFsection) # at the top!
891 our_tools = TFsection.findall("tool")
892 conf_tools = [x.attrib["file"] for x in our_tools]
893 if xmlfile not in conf_tools: # new
894 ET.SubElement(TFsection, "tool", {"file": xmlfile})
895 sortchildrenby(TFsection,"file")
896 tree.write(tcpath, pretty_print=True)
897
898
899
900
901
902
903 def shedLoad(self):
904 """
905 use bioblend to create new repository
906 or update existing
907
908 """
909 if os.path.exists(self.tlog):
910 sto = open(self.tlog, "a")
911 else:
912 sto = open(self.tlog, "w")
913
914 ts = toolshed.ToolShedInstance(
915 url=self.args.toolshed_url,
916 key=self.args.toolshed_api_key,
917 verify=False,
918 )
919 repos = ts.repositories.get_repositories()
920 rnames = [x.get("name", "?") for x in repos]
921 rids = [x.get("id", "?") for x in repos]
922 tfcat = "ToolFactory generated tools"
923 if self.tool_name not in rnames:
924 tscat = ts.categories.get_categories()
925 cnames = [x.get("name", "?").strip() for x in tscat]
926 cids = [x.get("id", "?") for x in tscat]
927 catID = None
928 if tfcat.strip() in cnames:
929 ci = cnames.index(tfcat)
930 catID = cids[ci]
931 res = ts.repositories.create_repository(
932 name=self.args.tool_name,
933 synopsis="Synopsis:%s" % self.args.tool_desc,
934 description=self.args.tool_desc,
935 type="unrestricted",
936 remote_repository_url=self.args.toolshed_url,
937 homepage_url=None,
938 category_ids=catID,
939 )
940 tid = res.get("id", None)
941 sto.write(f"#create_repository {self.args.tool_name} tid={tid} res={res}\n")
942 else:
943 i = rnames.index(self.tool_name)
944 tid = rids[i]
945 try:
946 res = ts.repositories.update_repository(
947 id=tid, tar_ball_path=self.newtarpath, commit_message=None
948 )
949 sto.write(f"#update res id {id} ={res}\n")
950 except ConnectionError:
951 sto.write(
952 "####### Is the toolshed running and the API key correct? Bioblend shed upload failed\n"
953 )
954 sto.close()
955
956 def eph_galaxy_load(self):
957 """
958 use ephemeris to load the new tool from the local toolshed after planemo uploads it
959 """
960 if os.path.exists(self.tlog):
961 tout = open(self.tlog, "a")
962 else:
963 tout = open(self.tlog, "w")
964 cll = [
965 "shed-tools",
966 "install",
967 "-g",
968 self.args.galaxy_url,
969 "--latest",
970 "-a",
971 self.args.galaxy_api_key,
972 "--name",
973 self.tool_name,
974 "--owner",
975 "fubar",
976 "--toolshed",
977 self.args.toolshed_url,
978 "--section_label",
979 "ToolFactory",
980 ]
981 tout.write("running\n%s\n" % " ".join(cll))
982 subp = subprocess.run(
983 cll,
984 env=self.ourenv,
985 cwd=self.ourcwd,
986 shell=False,
987 stderr=tout,
988 stdout=tout,
989 )
990 tout.write(
991 "installed %s - got retcode %d\n" % (self.tool_name, subp.returncode)
992 )
993 tout.close()
994 return subp.returncode
995
996 def planemo_biodocker_test(self):
997 """planemo currently leaks dependencies if used in the same container and gets unhappy after a
998 first successful run. https://github.com/galaxyproject/planemo/issues/1078#issuecomment-731476930
999
1000 Docker biocontainer has planemo with caches filled to save repeated downloads
1001
1002
1003 """
1004
1005 def prun(container, tout, cl, user="biodocker"):
1006 rlog = container.exec_run(cl, user=user)
1007 slogl = str(rlog).split("\\n")
1008 slog = "\n".join(slogl)
1009 tout.write(f"## got rlog {slog} from {cl}\n")
1010
1011 if os.path.exists(self.tlog):
1012 tout = open(self.tlog, "a")
1013 else:
1014 tout = open(self.tlog, "w")
1015 planemoimage = "quay.io/fubar2/planemo-biocontainer"
1016 xreal = "%s.xml" % self.tool_name
1017 repname = f"{self.tool_name}_planemo_test_report.html"
1018 ptestrep_path = os.path.join(self.repdir, repname)
1019 client = docker.from_env()
1020 tvol = client.volumes.create()
1021 tvolname = tvol.name
1022 destdir = "/toolfactory/ptest"
1023 imrep = os.path.join(destdir, repname)
1024 # need to keep the container running so keep it open with sleep
1025 # will stop and destroy it when we are done
1026 container = client.containers.run(
1027 planemoimage,
1028 "sleep 120m",
1029 detach=True,
1030 user="biodocker",
1031 volumes={f"{tvolname}": {"bind": "/toolfactory", "mode": "rw"}},
1032 )
1033 cl = f"mkdir -p {destdir}"
1034 prun(container, tout, cl, user="root")
1035 # that's how hard it is to get root on a biodocker container :(
1036 cl = f"rm -rf {destdir}/*"
1037 prun(container, tout, cl, user="root")
1038 ptestpath = os.path.join(destdir, "tfout", xreal)
1039 self.copy_to_container(self.tooloutdir, destdir, container)
1040 cl = "chown -R biodocker /toolfactory"
1041 prun(container, tout, cl, user="root")
1042 _ = container.exec_run(f"ls -la {destdir}")
1043 ptestcl = f"planemo test --test_output {imrep} --update_test_data --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}"
1044 try:
1045 _ = container.exec_run(ptestcl)
1046 # fails because test outputs missing but updates the test-data directory
1047 except Exception:
1048 e = sys.exc_info()[0]
1049 tout.write(f"#### error: {e} from {ptestcl}\n")
1050 cl = f"planemo test --test_output {imrep} --no_cleanup --test_data {destdir}/tfout/test-data --galaxy_root /home/biodocker/galaxy-central {ptestpath}"
1051 try:
1052 prun(container, tout, cl)
1053 except Exception:
1054 e = sys.exc_info()[0]
1055 tout.write(f"#### error: {e} from {ptestcl}\n")
1056 testouts = tempfile.mkdtemp(suffix=None, prefix="tftemp", dir=".")
1057 self.copy_from_container(destdir, testouts, container)
1058 src = os.path.join(testouts, "ptest")
1059 if os.path.isdir(src):
1060 shutil.copytree(src, ".", dirs_exist_ok=True)
1061 src = repname
1062 if os.path.isfile(repname):
1063 shutil.copyfile(src, ptestrep_path)
1064 else:
1065 tout.write(f"No output from run to shutil.copytree in {src}\n")
1066 tout.close()
1067 container.stop()
1068 container.remove()
1069 tvol.remove()
1070 shutil.rmtree(testouts) # leave for debugging
1071
1072
1073 # def run(self):
1074 # """
1075 # scripts must be small enough not to fill the pipe!
1076 # """
1077 # if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
1078 # retval = self.runBash()
1079 # else:
1080 # if self.opts.output_dir:
1081 # ste = open(self.elog,'w')
1082 # sto = open(self.tlog,'w')
1083 # sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
1084 # sto.flush()
1085 # p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
1086 # else:
1087 # p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
1088 # p.stdin.write(self.script)
1089 # p.stdin.close()
1090 # retval = p.wait()
1091 # if self.opts.output_dir:
1092 # sto.close()
1093 # ste.close()
1094 # err = open(self.elog,'r').read()
1095 # if retval <> 0 and err: # problem
1096 # print >> sys.stderr, '## error code %d returned with:\n%s' % (retval,err)
1097 # if self.opts.make_HTML:
1098 # self.makeHtml()
1099 # return retval
1100
1101 # def runBash(self):
1102 # """
1103 # cannot use - for bash so use self.sfile
1104 # """
1105 # if self.opts.output_dir:
1106 # s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
1107 # sto = open(self.tlog,'w')
1108 # sto.write(s)
1109 # sto.flush()
1110 # p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
1111 # else:
1112 # p = subprocess.Popen(self.cl,shell=False)
1113 # retval = p.wait()
1114 # if self.opts.output_dir:
1115 # sto.close()
1116 # if self.opts.make_HTML:
1117 # self.makeHtml()
1118 # return retval
1119
1120 # def make_conda_env(self, dep_list):
1121 # """
1122 # (venv) galaxy@ross-newgrt:/evol/galaxy$ mulled-hash bioblend=0.17.0,galaxyxml=0.4.14
1123 # mulled-v2-37438395e15c3d0bed4e02d66d5b05ca3d18b389:1d0b008b65909163243b3fdddd9aa20605f8a005
1124
1125 # conda create -n myenv python=3.9 scipy=0.17.3 astroid babel
1126
1127
1128 # """
1129 # dep_list.sort()
1130 # self.env_name = '-'.join(dep_list)
1131 # for e in self.xmlcl +
1132
1133 # for e in self.xclsuffix:
1134 # # xappendme = ["999", p["infilename"], "< $%s" % p["infilename"]]
1135 # else:
1136 # xappendme = [p["CL"], "$%s" % p["infilename"], ""]
1137 # xclsuffix.append(xappendme)
1138 # if os.path.exists(self.tlog):
1139 # tout = open(self.tlog, "a")
1140 # else:
1141 # tout = open(self.tlog, "w")
1142 # cli = ["conda", "create", "-n", self.env_name, ' '.join(dep_list)]
1143 # p = subprocess.run(
1144 # cll,
1145 # shell=False,
1146 # cwd=self.tooloutdir,
1147 # stderr=tout,
1148 # stdout=tout,
1149 # )
1150 # cli = ["conda", "activate", self.env_name, " && "]
1151 # cli.append(run_cmd)
1152 # tout.close()
1153
1154 def main():
1155 """
1156 This is a Galaxy wrapper.
1157 It expects to be called by a special purpose tool.xml
1158
1159 """
1160 parser = argparse.ArgumentParser()
1161 a = parser.add_argument
1162 a("--script_path", default=None)
1163 a("--history_test", default=None)
1164 a("--cl_user_suffix", default=None)
1165 a("--sysexe", default=None)
1166 a("--packages", default=None)
1167 a("--tool_name", default="newtool")
1168 a("--tool_dir", default=None)
1169 a("--input_files", default=[], action="append")
1170 a("--output_files", default=[], action="append")
1171 a("--user_email", default="Unknown")
1172 a("--bad_user", default=None)
1173 a("--help_text", default=None)
1174 a("--tool_desc", default=None)
1175 a("--tool_version", default="0.01")
1176 a("--citations", default=None)
1177 a("--command_override", default=None)
1178 a("--test_override", default=None)
1179 a("--additional_parameters", action="append", default=[])
1180 a("--selecttext_parameters", action="append", default=[])
1181 a("--edit_additional_parameters", action="store_true", default=False)
1182 a("--parampass", default="positional")
1183 a("--tfout", default="./tfout")
1184 a("--galaxy_root", default="/galaxy-central")
1185 a("--galaxy_venv", default="/galaxy_venv")
1186 a("--collection", action="append", default=[])
1187 a("--include_tests", default=False, action="store_true")
1188 a("--install_flag", action = "store_true", default=False)
1189 a("--admin_only", default=True, action="store_true")
1190 a("--untested_tool_out", default=None)
1191 a("--local_tools", default="tools") # relative to $__root_dir__
1192 a("--tool_conf_path", default="config/tool_conf.xml") # relative to $__root_dir__
1193 args = parser.parse_args()
1194 if args.admin_only:
1195 assert not args.bad_user, (
1196 'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy \
1197 admin adds %s to "admin_users" in the galaxy.yml Galaxy configuration file'
1198 % (args.bad_user, args.bad_user)
1199 )
1200 assert args.tool_name, "## This ToolFactory cannot build a tool without a tool name. Please supply one."
1201 tf = Tool_Factory(args)
1202 tf.writeShedyml()
1203 tf.makeTool()
1204 tf.planemo_test_update()
1205 tf.makeToolTar()
1206 tf.update_toolconf()
1207
1208
1209 if __name__ == "__main__":
1210 main()