comparison rgToolFactory2.py @ 25:9fe74bd23af2 draft

Uploaded
author fubar
date Mon, 02 Mar 2015 05:18:21 -0500
parents
children db35d39e1de9
comparison
equal deleted inserted replaced
24:1a4d3923aa9f 25:9fe74bd23af2
1 # rgToolFactoryMultIn.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9 #
10 # January 2015
11 # unified all setups by passing the script on the cl rather than via a PIPE - no need for treat_bash_special so removed
12 #
13 # in the process of building a complex tool
14 # added ability to choose one of the current toolshed package_r or package_perl or package_python dependencies and source that package
15 # add that package to tool_dependencies
16 # Note that once the generated tool is loaded, it will have that package's env.sh loaded automagically so there is no
17 # --envshpath in the parameters for the generated tool and it uses the system one which will be first on the adjusted path.
18 #
19 # sept 2014 added additional params from
20 # https://bitbucket.org/mvdbeek/dockertoolfactory/src/d4863bcf7b521532c7e8c61b6333840ba5393f73/DockerToolFactory.py?at=default
21 # passing them is complex
22 # and they are restricted to NOT contain commas or double quotes to ensure that they can be safely passed together on
23 # the toolfactory command line as a comma delimited double quoted string for parsing and passing to the script
24 # see examples on this tool form
25
26 # august 2014
27
28 # Allows arbitrary number of input files
29 # NOTE positional parameters are now passed to script
30 # and output (may be "None") is *before* arbitrary number of inputs
31 #
32 # march 2014
33 # had to remove dependencies because cross toolshed dependencies are not possible - can't pre-specify a toolshed url for graphicsmagick and ghostscript
34 # grrrrr - night before a demo
35 # added dependencies to a tool_dependencies.xml if html page generated so generated tool is properly portable
36 #
37 # added ghostscript and graphicsmagick as dependencies
38 # fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp
39 # errors ensued
40 #
41 # august 2013
42 # found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn
43 #
44 # july 2013
45 # added ability to combine images and individual log files into html output
46 # just make sure there's a log file foo.log and it will be output
47 # together with all images named like "foo_*.pdf
48 # otherwise old format for html
49 #
50 # January 2013
51 # problem pointed out by Carlos Borroto
52 # added escaping for <>$ - thought I did that ages ago...
53 #
54 # August 11 2012
55 # changed to use shell=False and cl as a sequence
56
57 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
58 # It also serves as the wrapper for the new tool.
59 #
60 # you paste and run your script
61 # Only works for simple scripts that read one input from the history.
62 # Optionally can write one new history dataset,
63 # and optionally collect any number of outputs into links on an autogenerated HTML page.
64
65 # DO NOT install on a public or important site - please.
66
67 # installed generated tools are fine if the script is safe.
68 # They just run normally and their user cannot do anything unusually insecure
69 # but please, practice safe toolshed.
70 # Read the fucking code before you install any tool
71 # especially this one
72
73 # After you get the script working on some test data, you can
74 # optionally generate a toolshed compatible gzip file
75 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
76 # safe and largely automated installation in a production Galaxy.
77
78 # If you opt for an HTML output, you get all the script outputs arranged
79 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
80 # Ugly but really inexpensive.
81 #
82 # Patches appreciated please.
83 #
84 #
85 # long route to June 2012 product
86 # Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them
87 # derived from an integrated script model
88 # called rgBaseScriptWrapper.py
89 # Note to the unwary:
90 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
91 # There is nothing stopping a malicious user doing whatever they choose
92 # Extremely dangerous!!
93 # Totally insecure. So, trusted users only
94 #
95 # preferred model is a developer using their throw away workstation instance - ie a private site.
96 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
97 #
98
99 import sys
100 import shutil
101 import subprocess
102 import os
103 import time
104 import tempfile
105 import optparse
106 import tarfile
107 import re
108 import shutil
109 import math
110
111 progname = os.path.split(sys.argv[0])[1]
112 myversion = 'V001.1 March 2014'
113 verbose = False
114 debug = False
115 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
116
117 # if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated
118 # tool xml
119
120 def timenow():
121 """return current time as a string
122 """
123 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
124
125 def quote_non_numeric(s):
126 """return a prequoted string for non-numerics
127 useful for perl and Rscript parameter passing?
128 """
129 try:
130 res = float(s)
131 return s
132 except ValueError:
133 return '"%s"' % s
134
135 html_escape_table = {
136 "&": "&amp;",
137 ">": "&gt;",
138 "<": "&lt;",
139 "$": "\$"
140 }
141
142 def html_escape(text):
143 """Produce entities within text."""
144 return "".join(html_escape_table.get(c,c) for c in text)
145
146
147 def html_unescape(text):
148 """Revert entities within text."""
149 t = text.replace('&amp;','&').replace('&gt;','>').replace('&lt;','<').replace('\$','$')
150 return t
151
152 def cmd_exists(cmd):
153 return subprocess.call("type " + cmd, shell=True,
154 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
155
156 def parse_citations(citations_text):
157 """
158 """
159 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
160 citation_tuples = []
161 for citation in citations:
162 if citation.startswith("doi"):
163 citation_tuples.append( ("doi", citation[len("doi"):].strip() ) )
164 else:
165 citation_tuples.append( ("bibtex", citation[len("bibtex"):].strip() ) )
166 return citation_tuples
167
168 def shell_source(script):
169 """need a way to source a Galaxy tool interpreter env.sh to point at the right dependency package
170 This based on the idea in http://pythonwise.blogspot.fr/2010/04/sourcing-shell-script.html
171 Note that we have to finesse any wierdly quoted newlines in automagic exports using nulls (env -0) as newlines"""
172 pipe = subprocess.Popen("env -i ; . %s ; env -0" % script, stdout=subprocess.PIPE, shell=True)
173 output = pipe.communicate()[0]
174 outl = output.split('\0')
175 outl = [x for x in outl if len(x.split("=")) == 2]
176 newenv = dict((line.split("=", 1) for line in outl))
177 os.environ.update(newenv)
178
179 class ScriptRunner:
180 """class is a wrapper for an arbitrary script
181 note funky templating. this should all be done proper.
182 Problem is, this kludge developed quite naturally and seems to work ok with
183 little overhead...
184
185 """
186
187
188 def __init__(self,opts=None):
189 """
190 cleanup inputs, setup some outputs
191
192 """
193
194 self.toolhtmldepinterpskel = """<?xml version="1.0"?>
195 <tool_dependency>
196 <package name="ghostscript" version="9.10">
197 <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
198 </package>
199 <package name="graphicsmagick" version="1.3.18">
200 <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
201 </package>
202 <package name="%(interpreter_name)s" version="%(interpreter_version)s">
203 <repository name="%(interpreter_pack)s" owner="%(interpreter_owner)s" prior_installation_required="True" />
204 </package>
205
206 <readme>
207 %(readme)s
208 This file was autogenerated by the Galaxy Tool Factory 2
209 </readme>
210 </tool_dependency>
211 """
212
213 self.toolhtmldepskel = """<?xml version="1.0"?>
214 <tool_dependency>
215 <package name="ghostscript" version="9.10">
216 <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
217 </package>
218 <package name="graphicsmagick" version="1.3.18">
219 <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
220 </package>
221 <readme>
222 %(readme)s
223 This file was autogenerated by the Galaxy Tool Factory 2
224 </readme>
225 </tool_dependency>
226 """
227
228 self.emptytoolhtmldepskel = """<?xml version="1.0"?>
229 <tool_dependency>
230 <readme>
231 %(readme)s
232 This file was autogenerated by the Galaxy Tool Factory 2
233 </readme>
234 </tool_dependency>
235 """
236
237 self.protorequirements = """<requirements>
238 <requirement type="package" version="9.10">ghostscript</requirement>
239 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
240 </requirements>"""
241
242 self.protorequirements_interpreter = """<requirements>
243 <requirement type="package" version="9.10">ghostscript</requirement>
244 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
245 <requirement type="package" version="%(interpreter_version)s">%(interpreter_name)s</requirement>
246 </requirements>"""
247
248
249 self.newCommand="""
250 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
251 --tool_name "%(toolname)s"
252 %(command_inputs)s
253 %(command_outputs)s
254 """
255
256 self.tooltestsTabOnly = """
257 <test>
258 %(test1Inputs)s
259 <param name="job_name" value="test1"/>
260 <param name="runMe" value="$runMe"/>
261 <output name="output1="%(test1Output)s" ftype="tabular"/>
262 %(additionalParams)s
263 </test>
264 """
265
266 self.tooltestsHTMLOnly = """
267 <test>
268 %(test1Inputs)s
269 <param name="job_name" value="test1"/>
270 <param name="runMe" value="$runMe"/>
271 %(additionalParams)s
272 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
273 </test>
274 """
275
276 self.tooltestsBoth = """
277 <test>
278 %(test1Inputs)s
279 <param name="job_name" value="test1"/>
280 <param name="runMe" value="$runMe"/>
281 %(additionalParams)s
282 <output name="output1" file="%(test1Output)s" ftype="tabular" />
283 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
284 </test>
285 """
286
287 self.newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
288 %(tooldesc)s
289 %(requirements)s
290 <command interpreter="python">
291 %(command)s
292 </command>
293 <inputs>
294 %(inputs)s
295 %(additionalInputs)s
296 </inputs>
297 <outputs>
298 %(outputs)s
299 </outputs>
300 <configfiles>
301 <configfile name="runMe">
302 %(script)s
303 </configfile>
304 </configfiles>
305 <tests>
306 %(tooltests)s
307 </tests>
308 <help>
309
310 %(help)s
311
312 This tool was autogenerated from a user provided script using the Galaxy Tool Factory 2
313 https://toolshed.g2.bx.psu.edu/view/fubar/tool_factory_2
314 </help>
315 <citations>
316 %(citations)s
317 <citation type="doi">10.1093/bioinformatics/bts573</citation>
318 </citations>
319 </tool>"""
320
321 self.useGM = cmd_exists('gm')
322 self.useIM = cmd_exists('convert')
323 self.useGS = cmd_exists('gs')
324 self.temp_warned = False # we want only one warning if $TMP not set
325 if opts.output_dir: # simplify for the tool tarball
326 os.chdir(opts.output_dir)
327 self.thumbformat = 'png'
328 self.opts = opts
329 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
330 self.toolid = self.toolname
331 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
332 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
333 self.xmlfile = '%s.xml' % self.toolname
334 rx = open(self.opts.script_path,'r').readlines()
335 rx = [x.rstrip() for x in rx] # remove pesky dos line endings if needed
336 self.script = '\n'.join(rx)
337 fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
338 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
339 tscript.write(self.script)
340 tscript.close()
341 self.indentedScript = " %s" % '\n'.join([' %s' % html_escape(x) for x in rx]) # for restructured text in help
342 self.escapedScript = "%s" % '\n'.join([' %s' % html_escape(x) for x in rx])
343 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname)
344 if opts.output_dir: # may not want these complexities
345 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname)
346 art = '%s.%s' % (self.toolname,opts.interpreter)
347 artpath = os.path.join(self.opts.output_dir,art) # need full path
348 artifact = open(artpath,'w') # use self.sfile as script source for Popen
349 artifact.write(self.script)
350 artifact.close()
351 self.cl = []
352 self.html = []
353 self.test1Inputs = [] # now a list
354 a = self.cl.append
355 a(opts.interpreter)
356 a(self.sfile)
357 # if multiple inputs - positional or need to distinguish them with cl params
358 if opts.input_tab:
359 tests = []
360 for i,intab in enumerate(opts.input_tab): # if multiple, make tests
361 if intab.find(',') <> -1:
362 (gpath,uname) = intab.split(',')
363 else:
364 gpath = uname = intab
365 tests.append(os.path.basename(gpath))
366 self.test1Inputs = '<param name="input_tab" value="%s" />' % (','.join(tests))
367 else:
368 self.test1Inputs = ''
369 # we always pass path,name pairs in using python optparse append
370 # but the command line has to be different
371 self.infile_paths = ''
372 self.infile_names = ''
373 if self.opts.input_tab:
374 self.infile_paths = ','.join([x.split(',')[0].strip() for x in self.opts.input_tab])
375 self.infile_names = ','.join([x.split(',')[1].strip() for x in self.opts.input_tab])
376 if self.opts.interpreter == 'python':
377 # yes, this is how additional parameters are always passed in python - to the TF itself and to
378 # scripts to avoid having unknown parameter names (yes, they can be parsed but...) on the command line
379 if self.opts.input_tab:
380 a('--inpaths=%s' % (self.infile_paths))
381 a('--innames=%s' % (self.infile_names))
382 if self.opts.output_tab:
383 a('--outpath=%s' % self.opts.output_tab)
384 for p in opts.additional_parameters:
385 p = p.replace('"','')
386 psplit = p.split(',')
387 param = html_unescape(psplit[0])
388 value = html_unescape(psplit[1])
389 a('%s="%s"' % (param,value))
390 if (self.opts.interpreter == 'Rscript'):
391 # pass params on command line as expressions which the script evaluates - see sample
392 if self.opts.input_tab:
393 a('INPATHS="%s"' % self.infile_paths)
394 a('INNAMES="%s"' % self.infile_names)
395 if self.opts.output_tab:
396 a('OUTPATH="%s"' % self.opts.output_tab)
397 for p in opts.additional_parameters:
398 p = p.replace('"','')
399 psplit = p.split(',')
400 param = html_unescape(psplit[0])
401 value = html_unescape(psplit[1])
402 a('%s=%s' % (param,quote_non_numeric(value)))
403 if (self.opts.interpreter == 'perl'):
404 # pass positional params on command line - perl script needs to discombobulate the path/name lists
405 if self.opts.input_tab:
406 a('%s' % self.infile_paths)
407 a('%s' % self.infile_names)
408 if self.opts.output_tab:
409 a('%s' % self.opts.output_tab)
410 for p in opts.additional_parameters:
411 # followed by any additional name=value parameter pairs
412 p = p.replace('"','')
413 psplit = p.split(',')
414 param = html_unescape(psplit[0])
415 value = html_unescape(psplit[1])
416 a('%s=%s' % (param,quote_non_numeric(value)))
417 if self.opts.interpreter == 'sh' or self.opts.interpreter == 'bash':
418 # more is better - now move all params into environment AND drop on to command line.
419 self.cl.insert(0,'env')
420 if self.opts.input_tab:
421 self.cl.insert(1,'INPATHS=%s' % (self.infile_paths))
422 self.cl.insert(2,'INNAMES=%s' % (self.infile_names))
423 if self.opts.output_tab:
424 self.cl.insert(3,'OUTPATH=%s' % (self.opts.output_tab))
425 a('OUTPATH=%s' % (self.opts.output_tab))
426 # sets those environment variables for the script
427 # additional params appear in CL - yes, it's confusing
428 for i,p in enumerate(opts.additional_parameters):
429 psplit = p.split(',')
430 param = html_unescape(psplit[0])
431 value = html_unescape(psplit[1])
432 a('%s=%s' % (param,quote_non_numeric(value)))
433 self.cl.insert(4+i,'%s=%s' % (param,quote_non_numeric(value)))
434 self.interpreter_owner = 'SYSTEM'
435 self.interpreter_pack = 'SYSTEM'
436 self.interpreter_name = 'SYSTEM'
437 self.interpreter_version = 'SYSTEM'
438 self.interpreter_revision = 'SYSTEM'
439 if opts.envshpath <> 'system': # need to parse out details for our tool_dependency
440 try: # fragile - depends on common naming convention as at jan 2015 = package_[interp]_v0_v1_v2... = version v0.v1.v2.. is in play
441 # this ONLY happens at tool generation by an admin - the generated tool always uses the default of system so path is from local env.sh
442 packdetails = opts.envshpath.split(os.path.sep)[-4:-1] # eg ['fubar', 'package_r_3_1_1', '63cdb9b2234c']
443 self.interpreter_owner = packdetails[0]
444 self.interpreter_pack = packdetails[1]
445 self.interpreter_name = packdetails[1].split('_')[1].upper()
446 self.interpreter_revision = packdetails[2]
447 self.interpreter_version = '.'.join(packdetails[1].split('_')[2:])
448 except:
449 pass
450 self.outFormats = opts.output_format
451 self.inputFormats = opts.input_formats
452 self.test1Output = '%s_test1_output.xls' % self.toolname
453 self.test1HTML = '%s_test1_output.html' % self.toolname
454
455 def makeXML(self):
456 """
457 Create a Galaxy xml tool wrapper for the new script as a string to write out
458 fixme - use templating or something less fugly than this example of what we produce
459
460 <tool id="reverse" name="reverse" version="0.01">
461 <description>a tabular file</description>
462 <command interpreter="python">
463 reverse.py --script_path "$runMe" --interpreter "python"
464 --tool_name "reverse" --input_tab "$input1" --output_tab "$output1"
465 </command>
466 <inputs>
467 <param name="input1" type="data" format="tabular" label="Select one or more input files from your history"/>
468 <param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
469 </inputs>
470 <outputs>
471 <data format="tabular" name="output1q" label="${job_name}"/>
472
473 </outputs>
474 <help>
475
476 **What it Does**
477
478 Reverse the columns in a tabular file
479
480 </help>
481 <configfiles>
482 <configfile name="runMe">
483
484 # reverse order of columns in a tabular file
485 import sys
486 inp = sys.argv[1]
487 outp = sys.argv[2]
488 i = open(inp,'r')
489 o = open(outp,'w')
490 for row in i:
491 rs = row.rstrip().split('\t')
492 rs.reverse()
493 o.write('\t'.join(rs))
494 o.write('\n')
495 i.close()
496 o.close()
497
498
499 </configfile>
500 </configfiles>
501 </tool>
502
503 """
504
505 # these templates need a dict with the right keys to match the parameters - outputs, help, code...
506
507 xdict = {}
508 xdict['additionalParams'] = ''
509 xdict['additionalInputs'] = ''
510 if self.opts.additional_parameters:
511 if self.opts.edit_additional_parameters: # add to new tool form with default value set to original value
512 xdict['additionalInputs'] = '\n'.join(['<param name="%s" value="%s" label="%s" help="%s" type="%s"/>' % \
513 (x.split(',')[0],html_escape(x.split(',')[1]),html_escape(x.split(',')[2]),html_escape(x.split(',')[3]), x.split(',')[4]) for x in self.opts.additional_parameters])
514 xdict['additionalParams'] = '\n'.join(['<param name="%s" value="%s" />' % (x.split(',')[0],html_escape(x.split(',')[1])) for x in self.opts.additional_parameters])
515 xdict['interpreter_owner'] = self.interpreter_owner
516 xdict['interpreter_version'] = self.interpreter_version
517 xdict['interpreter_pack'] = self.interpreter_pack
518 xdict['interpreter_name'] = self.interpreter_name
519 xdict['requirements'] = ''
520 if self.opts.include_dependencies == "yes":
521 if self.opts.envshpath <> 'system':
522 xdict['requirements'] = self.protorequirements_interpreter % xdict
523 else:
524 xdict['requirements'] = self.protorequirements
525 xdict['tool_version'] = self.opts.tool_version
526 xdict['test1HTML'] = self.test1HTML
527 xdict['test1Output'] = self.test1Output
528 xdict['test1Inputs'] = self.test1Inputs
529 if self.opts.make_HTML and self.opts.output_tab:
530 xdict['tooltests'] = self.tooltestsBoth % xdict
531 elif self.opts.make_HTML:
532 xdict['tooltests'] = self.tooltestsHTMLOnly % xdict
533 else:
534 xdict['tooltests'] = self.tooltestsTabOnly % xdict
535 xdict['script'] = self.escapedScript
536 # configfile is least painful way to embed script to avoid external dependencies
537 # but requires escaping of <, > and $ to avoid Mako parsing
538 if self.opts.help_text:
539 helptext = open(self.opts.help_text,'r').readlines()
540 helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek
541 xdict['help'] = ''.join([x for x in helptext])
542 else:
543 xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email)
544 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
545 coda.append('\n')
546 coda.append(self.indentedScript)
547 coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow()))
548 coda.append('See %s for details of that project' % (toolFactoryURL))
549 coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ')
550 coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n')
551 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
552 if self.opts.tool_desc:
553 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
554 else:
555 xdict['tooldesc'] = ''
556 xdict['command_outputs'] = ''
557 xdict['outputs'] = ''
558 if self.opts.input_tab:
559 cins = ['\n',]
560 cins.append('--input_formats %s' % self.opts.input_formats)
561 cins.append('#for intab in $input1:')
562 cins.append('--input_tab "${intab},${intab.name}"')
563 cins.append('#end for\n')
564 xdict['command_inputs'] = '\n'.join(cins)
565 xdict['inputs'] = '''<param name="input_tab" multiple="true" type="data" format="%s" label="Select one or more %s input files from your history"
566 help="Multiple inputs may be selected assuming the script can deal with them..."/> \n''' % (self.inputFormats,self.inputFormats)
567 else:
568 xdict['command_inputs'] = '' # assume no input - eg a random data generator
569 xdict['inputs'] = ''
570 if (len(self.opts.additional_parameters) > 0):
571 cins = ['\n',]
572 for params in self.opts.additional_parameters:
573 psplit = params.split(',') # name,value...
574 psplit[3] = html_escape(psplit[3])
575 if self.opts.edit_additional_parameters:
576 psplit[1] = '$%s' % psplit[0] # replace with form value
577 else:
578 psplit[1] = html_escape(psplit[1]) # leave prespecified value
579 cins.append('--additional_parameters """%s"""' % ','.join(psplit))
580 xdict['command_inputs'] = '%s\n%s' % (xdict['command_inputs'],'\n'.join(cins))
581 xdict['inputs'] += '<param name="job_name" type="text" size="60" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
582 xdict['toolname'] = self.toolname
583 xdict['toolid'] = self.toolid
584 xdict['interpreter'] = self.opts.interpreter
585 xdict['scriptname'] = self.sfile
586 if self.opts.make_HTML:
587 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"'
588 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
589 else:
590 xdict['command_outputs'] += ' --output_dir "./"'
591 if self.opts.output_tab:
592 xdict['command_outputs'] += ' --output_tab "$output1"'
593 xdict['outputs'] += ' <data format="%s" name="output1" label="${job_name}"/>\n' % self.outFormats
594 xdict['command'] = self.newCommand % xdict
595 if self.opts.citations:
596 citationstext = open(self.opts.citations,'r').read()
597 citation_tuples = parse_citations(citationstext)
598 citations_xml = ""
599 for citation_type, citation_content in citation_tuples:
600 citation_xml = """<citation type="%s">%s</citation>""" % (citation_type, html_escape(citation_content))
601 citations_xml += citation_xml
602 xdict['citations'] = citations_xml
603 else:
604 xdict['citations'] = ""
605 xmls = self.newXML % xdict
606 xf = open(self.xmlfile,'w')
607 xf.write(xmls)
608 xf.write('\n')
609 xf.close()
610 # ready for the tarball
611
612
613 def makeTooltar(self):
614 """
615 a tool is a gz tarball with eg
616 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
617 """
618 retval = self.run()
619 if retval:
620 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
621 sys.exit(1)
622 tdir = self.toolname
623 os.mkdir(tdir)
624 self.makeXML()
625 if self.opts.help_text:
626 hlp = open(self.opts.help_text,'r').read()
627 else:
628 hlp = 'Please ask the tool author for help as none was supplied at tool generation\n'
629 readme_dict = {'readme':hlp,'interpreter':self.opts.interpreter,'interpreter_version':self.interpreter_version,'interpreter_name':self.interpreter_name,
630 'interpreter_owner':self.interpreter_owner,'interpreter_pack':self.interpreter_pack}
631 if self.opts.include_dependencies == "yes":
632 if self.opts.envshpath == 'system':
633 tooldepcontent = self.toolhtmldepskel % readme_dict
634 else:
635 tooldepcontent = self.toolhtmldepinterpskel % readme_dict
636 else:
637 tooldepcontent = self.emptytoolhtmldepskel % readme_dict
638 depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w')
639 depf.write(tooldepcontent)
640 depf.write('\n')
641 depf.close()
642 testdir = os.path.join(tdir,'test-data')
643 os.mkdir(testdir) # make tests directory
644 for i,intab in enumerate(self.opts.input_tab):
645 si = self.opts.input_tab[i]
646 if si.find(',') <> -1:
647 s = si.split(',')[0]
648 si = s
649 dest = os.path.join(testdir,os.path.basename(si))
650 if si <> dest:
651 shutil.copyfile(si,dest)
652 if self.opts.output_tab:
653 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
654 if self.opts.make_HTML:
655 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
656 if self.opts.output_dir:
657 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
658 outpif = '%s.py' % self.toolname # new name
659 outpiname = os.path.join(tdir,outpif) # path for the tool tarball
660 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
661 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),]
662 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
663 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
664 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
665 notes += pi
666 outpi = open(outpiname,'w')
667 outpi.write(''.join(notes))
668 outpi.write('\n')
669 outpi.close()
670 stname = os.path.join(tdir,self.sfile)
671 if not os.path.exists(stname):
672 shutil.copyfile(self.sfile, stname)
673 xtname = os.path.join(tdir,self.xmlfile)
674 if not os.path.exists(xtname):
675 shutil.copyfile(self.xmlfile,xtname)
676 tarpath = "%s.tar.gz" % self.toolname
677 tar = tarfile.open(tarpath, "w:gz")
678 tar.add(tdir,arcname='%s' % self.toolname)
679 tar.close()
680 shutil.copyfile(tarpath,self.opts.new_tool)
681 shutil.rmtree(tdir)
682 ## TODO: replace with optional direct upload to local toolshed?
683 return retval
684
685
686 def compressPDF(self,inpdf=None,thumbformat='png'):
687 """need absolute path to pdf
688 note that GS gets confoozled if no $TMP or $TEMP
689 so we set it
690 """
691 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
692 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
693 sto = open(hlog,'a')
694 our_env = os.environ.copy()
695 our_tmp = our_env.get('TMP',None)
696 if not our_tmp:
697 our_tmp = our_env.get('TEMP',None)
698 if not (our_tmp and os.path.exists(our_tmp)):
699 newtmp = os.path.join(self.opts.output_dir,'tmp')
700 try:
701 os.mkdir(newtmp)
702 except:
703 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
704 our_env['TEMP'] = newtmp
705 if not self.temp_warned:
706 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
707 self.temp_warned = True
708 outpdf = '%s_compressed' % inpdf
709 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
710 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
711 retval1 = x.wait()
712 sto.close()
713 if retval1 == 0:
714 os.unlink(inpdf)
715 shutil.move(outpdf,inpdf)
716 os.unlink(hlog)
717 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
718 sto = open(hlog,'w')
719 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
720 if self.useGM:
721 cl2 = ['gm', 'convert', inpdf, outpng]
722 else: # assume imagemagick
723 cl2 = ['convert', inpdf, outpng]
724 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
725 retval2 = x.wait()
726 sto.close()
727 if retval2 == 0:
728 os.unlink(hlog)
729 retval = retval1 or retval2
730 return retval
731
732
733 def getfSize(self,fpath,outpath):
734 """
735 format a nice file size string
736 """
737 size = ''
738 fp = os.path.join(outpath,fpath)
739 if os.path.isfile(fp):
740 size = '0 B'
741 n = float(os.path.getsize(fp))
742 if n > 2**20:
743 size = '%1.1f MB' % (n/2**20)
744 elif n > 2**10:
745 size = '%1.1f KB' % (n/2**10)
746 elif n > 0:
747 size = '%d B' % (int(n))
748 return size
749
750 def makeHtml(self):
751 """ Create an HTML file content to list all the artifacts found in the output_dir
752 """
753
754 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
755 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
756 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
757 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
758 <title></title>
759 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
760 </head>
761 <body>
762 <div class="toolFormBody">
763 """
764 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
765 galhtmlpostfix = """</div></body></html>\n"""
766
767 flist = os.listdir(self.opts.output_dir)
768 flist = [x for x in flist if x <> 'Rplots.pdf']
769 flist.sort()
770 html = []
771 html.append(galhtmlprefix % progname)
772 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
773 fhtml = []
774 if len(flist) > 0:
775 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
776 logfiles.sort()
777 logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)]
778 logfiles.append(os.path.abspath(self.tlog)) # make it the last one
779 pdflist = []
780 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
781 for rownum,fname in enumerate(flist):
782 dname,e = os.path.splitext(fname)
783 sfsize = self.getfSize(fname,self.opts.output_dir)
784 if e.lower() == '.pdf' : # compress and make a thumbnail
785 thumb = '%s.%s' % (dname,self.thumbformat)
786 pdff = os.path.join(self.opts.output_dir,fname)
787 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
788 if retval == 0:
789 pdflist.append((fname,thumb))
790 else:
791 pdflist.append((fname,fname))
792 if (rownum+1) % 2 == 0:
793 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
794 else:
795 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
796 for logfname in logfiles: # expect at least tlog - if more
797 if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later
798 sectionname = 'All tool run'
799 if (len(logfiles) > 1):
800 sectionname = 'Other'
801 ourpdfs = pdflist
802 else:
803 realname = os.path.basename(logfname)
804 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
805 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
806 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
807 nacross = 1
808 npdf = len(ourpdfs)
809
810 if npdf > 0:
811 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
812 if int(nacross)**2 != npdf:
813 nacross += 1
814 nacross = int(nacross)
815 width = min(400,int(1200/nacross))
816 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
817 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
818 ntogo = nacross # counter for table row padding with empty cells
819 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
820 for i,paths in enumerate(ourpdfs):
821 fname,thumb = paths
822 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
823 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
824 if ((i+1) % nacross == 0):
825 s += '</tr>\n'
826 ntogo = 0
827 if i < (npdf - 1): # more to come
828 s += '<tr>'
829 ntogo = nacross
830 else:
831 ntogo -= 1
832 html.append(s)
833 if html[-1].strip().endswith('</tr>'):
834 html.append('</table></div>\n')
835 else:
836 if ntogo > 0: # pad
837 html.append('<td>&nbsp;</td>'*ntogo)
838 html.append('</tr></table></div>\n')
839 logt = open(logfname,'r').readlines()
840 logtext = [x for x in logt if x.strip() > '']
841 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
842 if len(logtext) > 1:
843 html.append('\n<pre>\n')
844 html += logtext
845 html.append('\n</pre>\n')
846 else:
847 html.append('%s is empty<br/>' % logfname)
848 if len(fhtml) > 0:
849 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
850 fhtml.append('</table></div><br/>')
851 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
852 html += fhtml # add all non-pdf files to the end of the display
853 else:
854 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
855 html.append(galhtmlpostfix)
856 htmlf = file(self.opts.output_html,'w')
857 htmlf.write('\n'.join(html))
858 htmlf.write('\n')
859 htmlf.close()
860 self.html = html
861
862
863
864 def run(self):
865 """
866 Some devteam tools have this defensive stderr read so I'm keeping with the faith
867 Feel free to update.
868 """
869 if self.opts.envshpath <> 'system':
870 shell_source(self.opts.envshpath)
871 # this only happens at tool generation - the generated tool relies on the dependencies all being set up
872 # at toolshed installation by sourcing local env.sh
873 if self.opts.output_dir:
874 ste = open(self.elog,'wb')
875 sto = open(self.tlog,'wb')
876 s = ' '.join(self.cl)
877 sto.write('## Executing Toolfactory generated command line = %s\n' % s)
878 sto.flush()
879 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,cwd=self.opts.output_dir)
880 retval = p.wait()
881 sto.close()
882 ste.close()
883 tmp_stderr = open( self.elog, 'rb' )
884 err = ''
885 buffsize = 1048576
886 try:
887 while True:
888 err += tmp_stderr.read( buffsize )
889 if not err or len( err ) % buffsize != 0:
890 break
891 except OverflowError:
892 pass
893 tmp_stderr.close()
894 else:
895 p = subprocess.Popen(self.cl,shell=False)
896 retval = p.wait()
897 if self.opts.output_dir:
898 if retval <> 0 and err: # problem
899 print >> sys.stderr,err
900 if self.opts.make_HTML:
901 self.makeHtml()
902 return retval
903
904
905
906 def main():
907 u = """
908 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
909 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
910 </command>
911 """
912 op = optparse.OptionParser()
913 a = op.add_option
914 a('--script_path',default=None)
915 a('--tool_name',default=None)
916 a('--interpreter',default=None)
917 a('--output_dir',default='./')
918 a('--output_html',default=None)
919 a('--input_tab',default=[], action="append") # these are "galaxypath,metadataname" pairs
920 a("--input_formats",default="tabular")
921 a('--output_tab',default=None)
922 a('--output_format',default='tabular')
923 a('--user_email',default='Unknown')
924 a('--bad_user',default=None)
925 a('--make_Tool',default=None)
926 a('--make_HTML',default=None)
927 a('--help_text',default=None)
928 a('--tool_desc',default=None)
929 a('--new_tool',default=None)
930 a('--tool_version',default=None)
931 a('--include_dependencies',default=None)
932 a('--citations',default=None)
933 a('--additional_parameters', dest='additional_parameters', action='append', default=[])
934 a('--edit_additional_parameters', action="store_true", default=False)
935 a('--envshpath',default="system")
936 opts, args = op.parse_args()
937 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
938 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
939 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
940 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
941 if opts.output_dir:
942 try:
943 os.makedirs(opts.output_dir)
944 except:
945 pass
946 opts.input_tab = [x.replace('"','').replace("'",'') for x in opts.input_tab]
947 for i,x in enumerate(opts.additional_parameters): # remove quotes we need to deal with spaces in CL params
948 opts.additional_parameters[i] = opts.additional_parameters[i].replace('"','')
949 r = ScriptRunner(opts)
950 if opts.make_Tool:
951 retcode = r.makeTooltar()
952 else:
953 retcode = r.run()
954 os.unlink(r.sfile)
955 if retcode:
956 sys.exit(retcode) # indicate failure to job runner
957
958
959 if __name__ == "__main__":
960 main()
961
962