comparison rgToolFactory2.py @ 13:00777b83aaca draft

Uploaded
author fubar
date Thu, 15 Jan 2015 07:43:13 -0500
parents
children 3635f4518c4d
comparison
equal deleted inserted replaced
12:bd8acc5a7590 13:00777b83aaca
1 # rgToolFactoryMultIn.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9 #
10 # January 2015
11 # in the process of building a complex tool
12 # added ability to choose one of the current toolshed package_r or package_perl or package_python dependencies and source that package
13 # need to add that package to tool_dependencies
14 #
15 # sept 2014 added additional params from
16 # https://bitbucket.org/mvdbeek/dockertoolfactory/src/d4863bcf7b521532c7e8c61b6333840ba5393f73/DockerToolFactory.py?at=default
17 # passing them is complex
18 # and they are restricted to NOT contain commas or double quotes to ensure that they can be safely passed together on
19 # the toolfactory command line as a comma delimited double quoted string for parsing and passing to the script
20 # see examples on this tool form
21
22 # august 2014
23
24 # Allows arbitrary number of input files
25 # NOTE positional parameters are now passed to script
26 # and output (may be "None") is *before* arbitrary number of inputs
27 #
28 # march 2014
29 # had to remove dependencies because cross toolshed dependencies are not possible - can't pre-specify a toolshed url for graphicsmagick and ghostscript
30 # grrrrr - night before a demo
31 # added dependencies to a tool_dependencies.xml if html page generated so generated tool is properly portable
32 #
33 # added ghostscript and graphicsmagick as dependencies
34 # fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp
35 # errors ensued
36 #
37 # august 2013
38 # found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn
39 #
40 # july 2013
41 # added ability to combine images and individual log files into html output
42 # just make sure there's a log file foo.log and it will be output
43 # together with all images named like "foo_*.pdf
44 # otherwise old format for html
45 #
46 # January 2013
47 # problem pointed out by Carlos Borroto
48 # added escaping for <>$ - thought I did that ages ago...
49 #
50 # August 11 2012
51 # changed to use shell=False and cl as a sequence
52
53 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
54 # It also serves as the wrapper for the new tool.
55 #
56 # you paste and run your script
57 # Only works for simple scripts that read one input from the history.
58 # Optionally can write one new history dataset,
59 # and optionally collect any number of outputs into links on an autogenerated HTML page.
60
61 # DO NOT install on a public or important site - please.
62
63 # installed generated tools are fine if the script is safe.
64 # They just run normally and their user cannot do anything unusually insecure
65 # but please, practice safe toolshed.
66 # Read the fucking code before you install any tool
67 # especially this one
68
69 # After you get the script working on some test data, you can
70 # optionally generate a toolshed compatible gzip file
71 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
72 # safe and largely automated installation in a production Galaxy.
73
74 # If you opt for an HTML output, you get all the script outputs arranged
75 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
76 # Ugly but really inexpensive.
77 #
78 # Patches appreciated please.
79 #
80 #
81 # long route to June 2012 product
82 # Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them
83 # derived from an integrated script model
84 # called rgBaseScriptWrapper.py
85 # Note to the unwary:
86 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
87 # There is nothing stopping a malicious user doing whatever they choose
88 # Extremely dangerous!!
89 # Totally insecure. So, trusted users only
90 #
91 # preferred model is a developer using their throw away workstation instance - ie a private site.
92 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
93 #
94
95 import sys
96 import shutil
97 import subprocess
98 import os
99 import time
100 import tempfile
101 import optparse
102 import tarfile
103 import re
104 import shutil
105 import math
106
107 progname = os.path.split(sys.argv[0])[1]
108 myversion = 'V001.1 March 2014'
109 verbose = False
110 debug = False
111 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
112
113 # if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated
114 # tool xml
115
116 def timenow():
117 """return current time as a string
118 """
119 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
120
121 html_escape_table = {
122 "&": "&amp;",
123 ">": "&gt;",
124 "<": "&lt;",
125 "$": "\$"
126 }
127
128 def html_escape(text):
129 """Produce entities within text."""
130 return "".join(html_escape_table.get(c,c) for c in text)
131
132
133 def html_unescape(text):
134 """Revert entities within text."""
135 t = text.replace('&amp;','&').replace('&gt;','>').replace('&lt;','<').replace('\$','$')
136 return t
137
138 def cmd_exists(cmd):
139 return subprocess.call("type " + cmd, shell=True,
140 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
141
142 def parse_citations(citations_text):
143 """
144 """
145 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
146 citation_tuples = []
147 for citation in citations:
148 if citation.startswith("doi"):
149 citation_tuples.append( ("doi", citation[len("doi"):].strip() ) )
150 else:
151 citation_tuples.append( ("bibtex", citation[len("bibtex"):].strip() ) )
152 return citation_tuples
153
154 def shell_source(script):
155 """need a way to source a Galaxy tool interpreter env.sh so we can use that dependency
156 package
157 see http://pythonwise.blogspot.fr/2010/04/sourcing-shell-script.html
158 Sometime you want to emulate the action of "source" in bash,
159 settings some environment variables. Here is a way to do it.
160 Note that we have to finesse the automagic exports using nulls as newlines for env"""
161 pipe = subprocess.Popen("env -i ; . %s ; env -0" % script, stdout=subprocess.PIPE, shell=True)
162 output = pipe.communicate()[0]
163 outl = output.split('\0')
164 outl = [x for x in outl if len(x.split("=")) == 2]
165 newenv = dict((line.split("=", 1) for line in outl))
166 os.environ.update(newenv)
167
168 class ScriptRunner:
169 """class is a wrapper for an arbitrary script
170 note funky templating. this should all be done proper.
171 Problem is, this kludge developed quite naturally and seems to work ok with
172 little overhead...
173
174 """
175
176
177 def __init__(self,opts=None,treatbashSpecial=True):
178 """
179 cleanup inputs, setup some outputs
180
181 """
182
183 self.toolhtmldepinterpskel = """<?xml version="1.0"?>
184 <tool_dependency>
185 <package name="ghostscript" version="9.10">
186 <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
187 </package>
188 <package name="graphicsmagick" version="1.3.18">
189 <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
190 </package>
191 <package name="%(interpreter_name)s" version="%(interpreter_version)s">
192 <repository name="%(interpreter_pack)s" owner="%(interpreter_owner)s" prior_installation_required="True" />
193 </package>
194
195 <readme>
196 %(readme)s
197 </readme>
198 </tool_dependency>
199 """
200
201 self.toolhtmldepskel = """<?xml version="1.0"?>
202 <tool_dependency>
203 <package name="ghostscript" version="9.10">
204 <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
205 </package>
206 <package name="graphicsmagick" version="1.3.18">
207 <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
208 </package>
209 <readme>
210 %(readme)s
211 </readme>
212 </tool_dependency>
213 """
214
215 self.emptytoolhtmldepskel = """<?xml version="1.0"?>
216 <tool_dependency>
217 <readme>
218 %(readme)s
219 </readme>
220 </tool_dependency>
221 """
222
223 self.protorequirements = """<requirements>
224 <requirement type="package" version="9.10">ghostscript</requirement>
225 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
226 </requirements>"""
227
228 self.protorequirements_interpreter = """<requirements>
229 <requirement type="package" version="9.10">ghostscript</requirement>
230 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
231 <requirement type="package" version="%(interpreter_version)s">%(interpreter_name)s</requirement>
232 </requirements>"""
233
234
235 self.newCommand="""
236 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
237 --tool_name "%(toolname)s"
238 %(command_inputs)s
239 %(command_outputs)s
240 """
241
242 self.tooltestsTabOnly = """
243 <test>
244 %(test1Inputs)s
245 <param name="job_name" value="test1"/>
246 <param name="runMe" value="$runMe"/>
247 <output name="output1="%(test1Output)s" ftype="tabular"/>
248 %(additionalParams)s
249 </test>
250 """
251
252 self.tooltestsHTMLOnly = """
253 <test>
254 %(test1Inputs)s
255 <param name="job_name" value="test1"/>
256 <param name="runMe" value="$runMe"/>
257 %(additionalParams)s
258 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
259 </test>
260 """
261
262 self.tooltestsBoth = """
263 <test>
264 %(test1Inputs)s
265 <param name="job_name" value="test1"/>
266 <param name="runMe" value="$runMe"/>
267 %(additionalParams)s
268 <output name="output1" file="%(test1Output)s" ftype="tabular" />
269 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
270 </test>
271 """
272
273 self.newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
274 %(tooldesc)s
275 %(requirements)s
276 <command interpreter="python">
277 %(command)s
278 </command>
279 <inputs>
280 %(inputs)s
281 %(additionalInputs)s
282 </inputs>
283 <outputs>
284 %(outputs)s
285 </outputs>
286 <configfiles>
287 <configfile name="runMe">
288 %(script)s
289 </configfile>
290 </configfiles>
291 <tests>
292 %(tooltests)s
293 </tests>
294 <help>
295
296 %(help)s
297
298 </help>
299 <citations>
300 %(citations)s
301 <citation type="doi">10.1093/bioinformatics/bts573</citation>
302 </citations>
303 </tool>"""
304
305 self.useGM = cmd_exists('gm')
306 self.useIM = cmd_exists('convert')
307 self.useGS = cmd_exists('gs')
308 self.temp_warned = False # we want only one warning if $TMP not set
309 self.treatbashSpecial = treatbashSpecial
310 if opts.output_dir: # simplify for the tool tarball
311 os.chdir(opts.output_dir)
312 self.thumbformat = 'png'
313 self.opts = opts
314 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
315 self.toolid = self.toolname
316 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
317 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
318 self.xmlfile = '%s.xml' % self.toolname
319 rx = open(self.opts.script_path,'r').readlines()
320 rx = [x.rstrip() for x in rx] # remove pesky dos line endings if needed
321 self.script = '\n'.join(rx)
322 fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
323 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
324 tscript.write(self.script)
325 tscript.close()
326 self.indentedScript = " %s" % '\n'.join([' %s' % html_escape(x) for x in rx]) # for restructured text in help
327 self.escapedScript = "%s" % '\n'.join([' %s' % html_escape(x) for x in rx])
328 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname)
329 if opts.output_dir: # may not want these complexities
330 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname)
331 art = '%s.%s' % (self.toolname,opts.interpreter)
332 artpath = os.path.join(self.opts.output_dir,art) # need full path
333 artifact = open(artpath,'w') # use self.sfile as script source for Popen
334 artifact.write(self.script)
335 artifact.close()
336 self.cl = []
337 self.html = []
338 self.test1Inputs = [] # now a list
339 a = self.cl.append
340 a(opts.interpreter)
341 a(self.sfile)
342 # if multiple inputs - positional or need to distinguish them with cl params
343 if opts.input_tab:
344 tests = []
345 for i,intab in enumerate(opts.input_tab): # if multiple, make tests
346 if intab.find(',') <> -1:
347 (gpath,uname) = intab.split(',')
348 else:
349 gpath = uname = intab
350 tests.append(os.path.basename(gpath))
351 self.test1Inputs = '<param name="input_tab" value="%s" />' % (','.join(tests))
352 else:
353 self.test1Inputs = ''
354 # we always pass path,name pairs in using python optparse append
355 # but the command line has to be different
356 self.infile_paths = ''
357 self.infile_names = ''
358 if self.opts.input_tab:
359 self.infile_paths = ','.join([x.split(',')[0].strip() for x in self.opts.input_tab])
360 self.infile_names = ','.join([x.split(',')[1].strip() for x in self.opts.input_tab])
361 if self.opts.interpreter == 'python':
362 # yes, this is how additional parameters are always passed in python - to the TF itself and to
363 # scripts to avoid having unknown parameter names (yes, they can be parsed but...) on the command line
364 if self.opts.input_tab:
365 a('--INPATHS "%s"' % (self.infile_paths))
366 a('--INNAMES "%s"' % (self.infile_names))
367 if self.opts.output_tab:
368 a('--OUTPATH "%s"' % self.opts.output_tab)
369 for p in opts.additional_parameters:
370 p = p.replace('"','')
371 psplit = p.split(',')
372 param = html_unescape(psplit[0])
373 value = html_unescape(psplit[1])
374 a('%s="%s"' % (param,value))
375 if (self.opts.interpreter == 'Rscript'):
376 # pass params on command line
377 if self.opts.input_tab:
378 a('INPATHS="%s"' % self.infile_paths)
379 a('INNAMES="%s"' % self.infile_names)
380 if self.opts.output_tab:
381 a('OUTPATH="%s"' % self.opts.output_tab)
382 for p in opts.additional_parameters:
383 p = p.replace('"','')
384 psplit = p.split(',')
385 param = html_unescape(psplit[0])
386 value = html_unescape(psplit[1])
387 a('%s="%s"' % (param,value))
388 if (self.opts.interpreter == 'perl'):
389 # pass params on command line
390 if self.opts.input_tab:
391 a('%s' % self.infile_paths)
392 a('%s' % self.infile_names)
393 if self.opts.output_tab:
394 a('%s' % self.opts.output_tab)
395 for p in opts.additional_parameters:
396 p = p.replace('"','')
397 psplit = p.split(',')
398 param = html_unescape(psplit[0])
399 value = html_unescape(psplit[1])
400 if (value.find(' ') <> -1):
401 a('%s="%s"' % (param,value))
402 else:
403 a('%s=%s' % (param,value))
404 if self.opts.interpreter == 'sh' or self.opts.interpreter == 'bash':
405 # more is better - now move all params into environment AND drop on to command line.
406 self.cl.insert(0,'env')
407 if self.opts.input_tab:
408 self.cl.insert(1,'INPATHS=%s' % (self.infile_paths))
409 self.cl.insert(2,'INNAMES=%s' % (self.infile_names))
410 if self.opts.output_tab:
411 self.cl.insert(3,'OUTPATH=%s' % (self.opts.output_tab))
412 a('OUTPATH=%s' % (self.opts.output_tab))
413 # sets those environment variables for the script
414 # additional params appear in CL - yes, it's confusing
415 for i,p in enumerate(opts.additional_parameters):
416 psplit = p.split(',')
417 param = html_unescape(psplit[0])
418 value = html_unescape(psplit[1])
419 if (value.find(' ') <> -1):
420 a('%s="%s"' % (param,value))
421 self.cl.insert(4+i,'%s="%s"' % (param,value))
422 else:
423 a('%s=%s' % (param,value))
424 self.cl.insert(4+i,'%s=%s' % (param,value))
425 self.interp_owner = None
426 self.interp_pack = None
427 self.interp_revision = None
428 self.interp_version = None
429 if opts.envshpath <> 'system': # need to parse out details for our tool_dependency
430 try:
431 packdetails = opts.envshpath.split(os.path.sep)[-4:-1] # eg ['fubar', 'package_r_3_1_1', '63cdb9b2234c']
432 self.interpreter_owner = packdetails[0]
433 self.interpreter_pack = packdetails[1]
434 self.interpreter_name = packdetails[1].split('_')[1].upper()
435 self.interpreter_revision = packdetails[2]
436 self.interpreter_version = '.'.join(self.interpreter_pack.split('_')[2:])
437 # hope our naming convention as at jan 2015 = package_[interp]_v0_v1_v2... = version v0.v1.v2.. is in play
438 except:
439 pass
440 self.outFormats = opts.output_format
441 self.inputFormats = opts.input_formats
442 self.test1Output = '%s_test1_output.xls' % self.toolname
443 self.test1HTML = '%s_test1_output.html' % self.toolname
444
445 def makeXML(self):
446 """
447 Create a Galaxy xml tool wrapper for the new script as a string to write out
448 fixme - use templating or something less fugly than this example of what we produce
449
450 <tool id="reverse" name="reverse" version="0.01">
451 <description>a tabular file</description>
452 <command interpreter="python">
453 reverse.py --script_path "$runMe" --interpreter "python"
454 --tool_name "reverse" --input_tab "$input1" --output_tab "$output1"
455 </command>
456 <inputs>
457 <param name="input1" type="data" format="tabular" label="Select one or more input files from your history"/>
458 <param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
459 </inputs>
460 <outputs>
461 <data format="tabular" name="output1q" label="${job_name}"/>
462
463 </outputs>
464 <help>
465
466 **What it Does**
467
468 Reverse the columns in a tabular file
469
470 </help>
471 <configfiles>
472 <configfile name="runMe">
473
474 # reverse order of columns in a tabular file
475 import sys
476 inp = sys.argv[1]
477 outp = sys.argv[2]
478 i = open(inp,'r')
479 o = open(outp,'w')
480 for row in i:
481 rs = row.rstrip().split('\t')
482 rs.reverse()
483 o.write('\t'.join(rs))
484 o.write('\n')
485 i.close()
486 o.close()
487
488
489 </configfile>
490 </configfiles>
491 </tool>
492
493 """
494
495 # these templates need a dict with the right keys to match the parameters - outputs, help, code...
496
497 xdict = {}
498 xdict['additionalParams'] = ''
499 xdict['additionalInputs'] = ''
500 if self.opts.additional_parameters:
501 if self.opts.edit_additional_parameters: # add to new tool form with default value set to original value
502 xdict['additionalInputs'] = '\n'.join(['<param name="%s" value="%s" label="%s" help="%s" type="%s"/>' % \
503 (x.split(',')[0],html_escape(x.split(',')[1]),html_escape(x.split(',')[2]),html_escape(x.split(',')[3]), x.split(',')[4]) for x in self.opts.additional_parameters])
504 xdict['additionalParams'] = '\n'.join(['<param name="%s" value="%s" />' % (x.split(',')[0],html_escape(x.split(',')[1])) for x in self.opts.additional_parameters])
505 xdict['interpreter_owner'] = self.interpreter_owner
506 xdict['interpreter_version'] = self.interpreter_version
507 xdict['interpreter_pack'] = self.interpreter_pack
508 xdict['interpreter_name'] = self.interpreter_name
509 xdict['requirements'] = ''
510 if self.opts.include_dependencies == "yes":
511 if self.opts.envshpath <> 'system':
512 xdict['requirements'] = self.protorequirements_interpreter % xdict
513 else:
514 xdict['requirements'] = self.protorequirements
515 xdict['tool_version'] = self.opts.tool_version
516 xdict['test1HTML'] = self.test1HTML
517 xdict['test1Output'] = self.test1Output
518 xdict['test1Inputs'] = self.test1Inputs
519 if self.opts.make_HTML and self.opts.output_tab:
520 xdict['tooltests'] = self.tooltestsBoth % xdict
521 elif self.opts.make_HTML:
522 xdict['tooltests'] = self.tooltestsHTMLOnly % xdict
523 else:
524 xdict['tooltests'] = self.tooltestsTabOnly % xdict
525 xdict['script'] = self.escapedScript
526 # configfile is least painful way to embed script to avoid external dependencies
527 # but requires escaping of <, > and $ to avoid Mako parsing
528 if self.opts.help_text:
529 helptext = open(self.opts.help_text,'r').readlines()
530 helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek
531 xdict['help'] = ''.join([x for x in helptext])
532 else:
533 xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email)
534 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
535 coda.append('\n')
536 coda.append(self.indentedScript)
537 coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow()))
538 coda.append('See %s for details of that project' % (toolFactoryURL))
539 coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ')
540 coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n')
541 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
542 if self.opts.tool_desc:
543 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
544 else:
545 xdict['tooldesc'] = ''
546 xdict['command_outputs'] = ''
547 xdict['outputs'] = ''
548 if self.opts.input_tab:
549 cins = ['\n',]
550 cins.append('--input_formats %s' % self.opts.input_formats)
551 cins.append('#for intab in $input1:')
552 cins.append('--input_tab "${intab},${intab.name}"')
553 cins.append('#end for\n')
554 xdict['command_inputs'] = '\n'.join(cins)
555 xdict['inputs'] = '''<param name="input_tab" multiple="true" type="data" format="%s" label="Select one or more %s input files from your history"
556 help="Multiple inputs may be selected assuming the script can deal with them..."/> \n''' % (self.inputFormats,self.inputFormats)
557 else:
558 xdict['command_inputs'] = '' # assume no input - eg a random data generator
559 xdict['inputs'] = ''
560 if (len(self.opts.additional_parameters) > 0):
561 cins = ['\n',]
562 for params in self.opts.additional_parameters:
563 psplit = params.split(',') # name,value...
564 psplit[3] = html_escape(psplit[3])
565 if self.opts.edit_additional_parameters:
566 psplit[1] = '$%s' % psplit[0] # replace with form value
567 else:
568 psplit[1] = html_escape(psplit[1]) # leave prespecified value
569 cins.append('--additional_parameters """%s"""' % ','.join(psplit))
570 xdict['command_inputs'] = '%s\n%s' % (xdict['command_inputs'],'\n'.join(cins))
571 xdict['inputs'] += '<param name="job_name" type="text" size="60" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
572 xdict['toolname'] = self.toolname
573 xdict['toolid'] = self.toolid
574 xdict['interpreter'] = self.opts.interpreter
575 xdict['scriptname'] = self.sfile
576 if self.opts.make_HTML:
577 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"'
578 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
579 else:
580 xdict['command_outputs'] += ' --output_dir "./"'
581 if self.opts.output_tab:
582 xdict['command_outputs'] += ' --output_tab "$output1"'
583 xdict['outputs'] += ' <data format="%s" name="output1" label="${job_name}"/>\n' % self.outFormats
584 xdict['command'] = self.newCommand % xdict
585 if self.opts.citations:
586 citationstext = open(self.opts.citations,'r').read()
587 citation_tuples = parse_citations(citationstext)
588 citations_xml = ""
589 for citation_type, citation_content in citation_tuples:
590 citation_xml = """<citation type="%s">%s</citation>""" % (citation_type, html_escape(citation_content))
591 citations_xml += citation_xml
592 xdict['citations'] = citations_xml
593 else:
594 xdict['citations'] = ""
595 xmls = self.newXML % xdict
596 xf = open(self.xmlfile,'w')
597 xf.write(xmls)
598 xf.write('\n')
599 xf.close()
600 # ready for the tarball
601
602
603 def makeTooltar(self):
604 """
605 a tool is a gz tarball with eg
606 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
607 """
608 retval = self.run()
609 if retval:
610 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
611 sys.exit(1)
612 tdir = self.toolname
613 os.mkdir(tdir)
614 self.makeXML()
615 if self.opts.help_text:
616 hlp = open(self.opts.help_text,'r').read()
617 else:
618 hlp = 'Please ask the tool author for help as none was supplied at tool generation\n'
619 readme_dict = {'readme':hlp,'interpreter':self.opts.interpreter,'interpreter_version':self.interpreter_version,'interpreter_name':self.interpreter_name,
620 'interpreter_owner':self.interpreter_owner}
621 if self.opts.include_dependencies == "yes":
622 if self.opts.envshpath == 'system':
623 tooldepcontent = self.toolhtmldepskel % readme_dict
624 else:
625 tooldepcontent = self.toolhtmldepinterpskel % readme_dict
626 else:
627 tooldepcontent = self.emptytoolhtmldepskel % readme_dictls -l
628 depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w')
629 depf.write(tooldepcontent)
630 depf.write('\n')
631 depf.close()
632 if self.opts.input_tab: # no reproducible test otherwise? TODO: maybe..
633 testdir = os.path.join(tdir,'test-data')
634 os.mkdir(testdir) # make tests directory
635 for i,intab in enumerate(self.opts.input_tab):
636 si = self.opts.input_tab[i]
637 if si.find(',') <> -1:
638 s = si.split(',')[0]
639 si = s
640 dest = os.path.join(testdir,os.path.basename(si))
641 if si <> dest:
642 shutil.copyfile(si,dest)
643 if self.opts.output_tab:
644 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
645 if self.opts.make_HTML:
646 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
647 if self.opts.output_dir:
648 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
649 outpif = '%s.py' % self.toolname # new name
650 outpiname = os.path.join(tdir,outpif) # path for the tool tarball
651 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
652 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),]
653 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
654 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
655 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
656 notes += pi
657 outpi = open(outpiname,'w')
658 outpi.write(''.join(notes))
659 outpi.write('\n')
660 outpi.close()
661 stname = os.path.join(tdir,self.sfile)
662 if not os.path.exists(stname):
663 shutil.copyfile(self.sfile, stname)
664 xtname = os.path.join(tdir,self.xmlfile)
665 if not os.path.exists(xtname):
666 shutil.copyfile(self.xmlfile,xtname)
667 tarpath = "%s.tar.gz" % self.toolname
668 tar = tarfile.open(tarpath, "w:gz")
669 tar.add(tdir,arcname='%s' % self.toolname)
670 tar.close()
671 shutil.copyfile(tarpath,self.opts.new_tool)
672 shutil.rmtree(tdir)
673 ## TODO: replace with optional direct upload to local toolshed?
674 return retval
675
676
677 def compressPDF(self,inpdf=None,thumbformat='png'):
678 """need absolute path to pdf
679 note that GS gets confoozled if no $TMP or $TEMP
680 so we set it
681 """
682 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
683 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
684 sto = open(hlog,'a')
685 our_env = os.environ.copy()
686 our_tmp = our_env.get('TMP',None)
687 if not our_tmp:
688 our_tmp = our_env.get('TEMP',None)
689 if not (our_tmp and os.path.exists(our_tmp)):
690 newtmp = os.path.join(self.opts.output_dir,'tmp')
691 try:
692 os.mkdir(newtmp)
693 except:
694 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
695 our_env['TEMP'] = newtmp
696 if not self.temp_warned:
697 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
698 self.temp_warned = True
699 outpdf = '%s_compressed' % inpdf
700 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
701 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
702 retval1 = x.wait()
703 sto.close()
704 if retval1 == 0:
705 os.unlink(inpdf)
706 shutil.move(outpdf,inpdf)
707 os.unlink(hlog)
708 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
709 sto = open(hlog,'w')
710 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
711 if self.useGM:
712 cl2 = ['gm', 'convert', inpdf, outpng]
713 else: # assume imagemagick
714 cl2 = ['convert', inpdf, outpng]
715 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
716 retval2 = x.wait()
717 sto.close()
718 if retval2 == 0:
719 os.unlink(hlog)
720 retval = retval1 or retval2
721 return retval
722
723
724 def getfSize(self,fpath,outpath):
725 """
726 format a nice file size string
727 """
728 size = ''
729 fp = os.path.join(outpath,fpath)
730 if os.path.isfile(fp):
731 size = '0 B'
732 n = float(os.path.getsize(fp))
733 if n > 2**20:
734 size = '%1.1f MB' % (n/2**20)
735 elif n > 2**10:
736 size = '%1.1f KB' % (n/2**10)
737 elif n > 0:
738 size = '%d B' % (int(n))
739 return size
740
741 def makeHtml(self):
742 """ Create an HTML file content to list all the artifacts found in the output_dir
743 """
744
745 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
746 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
747 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
748 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
749 <title></title>
750 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
751 </head>
752 <body>
753 <div class="toolFormBody">
754 """
755 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
756 galhtmlpostfix = """</div></body></html>\n"""
757
758 flist = os.listdir(self.opts.output_dir)
759 flist = [x for x in flist if x <> 'Rplots.pdf']
760 flist.sort()
761 html = []
762 html.append(galhtmlprefix % progname)
763 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
764 fhtml = []
765 if len(flist) > 0:
766 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
767 logfiles.sort()
768 logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)]
769 logfiles.append(os.path.abspath(self.tlog)) # make it the last one
770 pdflist = []
771 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
772 for rownum,fname in enumerate(flist):
773 dname,e = os.path.splitext(fname)
774 sfsize = self.getfSize(fname,self.opts.output_dir)
775 if e.lower() == '.pdf' : # compress and make a thumbnail
776 thumb = '%s.%s' % (dname,self.thumbformat)
777 pdff = os.path.join(self.opts.output_dir,fname)
778 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
779 if retval == 0:
780 pdflist.append((fname,thumb))
781 else:
782 pdflist.append((fname,fname))
783 if (rownum+1) % 2 == 0:
784 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
785 else:
786 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
787 for logfname in logfiles: # expect at least tlog - if more
788 if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later
789 sectionname = 'All tool run'
790 if (len(logfiles) > 1):
791 sectionname = 'Other'
792 ourpdfs = pdflist
793 else:
794 realname = os.path.basename(logfname)
795 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
796 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
797 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
798 nacross = 1
799 npdf = len(ourpdfs)
800
801 if npdf > 0:
802 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
803 if int(nacross)**2 != npdf:
804 nacross += 1
805 nacross = int(nacross)
806 width = min(400,int(1200/nacross))
807 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
808 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
809 ntogo = nacross # counter for table row padding with empty cells
810 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
811 for i,paths in enumerate(ourpdfs):
812 fname,thumb = paths
813 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
814 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
815 if ((i+1) % nacross == 0):
816 s += '</tr>\n'
817 ntogo = 0
818 if i < (npdf - 1): # more to come
819 s += '<tr>'
820 ntogo = nacross
821 else:
822 ntogo -= 1
823 html.append(s)
824 if html[-1].strip().endswith('</tr>'):
825 html.append('</table></div>\n')
826 else:
827 if ntogo > 0: # pad
828 html.append('<td>&nbsp;</td>'*ntogo)
829 html.append('</tr></table></div>\n')
830 logt = open(logfname,'r').readlines()
831 logtext = [x for x in logt if x.strip() > '']
832 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
833 if len(logtext) > 1:
834 html.append('\n<pre>\n')
835 html += logtext
836 html.append('\n</pre>\n')
837 else:
838 html.append('%s is empty<br/>' % logfname)
839 if len(fhtml) > 0:
840 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
841 fhtml.append('</table></div><br/>')
842 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
843 html += fhtml # add all non-pdf files to the end of the display
844 else:
845 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
846 html.append(galhtmlpostfix)
847 htmlf = file(self.opts.output_html,'w')
848 htmlf.write('\n'.join(html))
849 htmlf.write('\n')
850 htmlf.close()
851 self.html = html
852
853
854
855 def run(self):
856 """
857 scripts must be small enough not to fill the pipe!
858 """
859 if self.opts.envshpath <> 'system':
860 shell_source(self.opts.envshpath)
861 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
862 retval = self.runBash()
863 else:
864 if self.opts.output_dir:
865 ste = open(self.elog,'w')
866 sto = open(self.tlog,'w')
867 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
868 sto.flush()
869 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,cwd=self.opts.output_dir)
870 else:
871 p = subprocess.Popen(self.cl,shell=False)
872 retval = p.wait()
873 if self.opts.output_dir:
874 sto.close()
875 ste.close()
876 err = open(self.elog,'r').readlines()
877 if retval <> 0 and err: # problem
878 print >> sys.stderr,err
879 if self.opts.make_HTML:
880 self.makeHtml()
881 return retval
882
883 def runBash(self):
884 """
885 cannot use - for bash so use self.sfile
886 """
887 if self.opts.output_dir:
888 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
889 sto = open(self.tlog,'w')
890 sto.write(s)
891 sto.flush()
892 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
893 else:
894 p = subprocess.Popen(self.cl,shell=False)
895 retval = p.wait()
896 if self.opts.output_dir:
897 sto.close()
898 if self.opts.make_HTML:
899 self.makeHtml()
900 return retval
901
902
903 def main():
904 u = """
905 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
906 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
907 </command>
908 """
909 op = optparse.OptionParser()
910 a = op.add_option
911 a('--script_path',default=None)
912 a('--tool_name',default=None)
913 a('--interpreter',default=None)
914 a('--output_dir',default='./')
915 a('--output_html',default=None)
916 a('--input_tab',default=[], action="append") # these are "galaxypath,metadataname" pairs
917 a("--input_formats",default="tabular")
918 a('--output_tab',default=None)
919 a('--output_format',default='tabular')
920 a('--user_email',default='Unknown')
921 a('--bad_user',default=None)
922 a('--make_Tool',default=None)
923 a('--make_HTML',default=None)
924 a('--help_text',default=None)
925 a('--tool_desc',default=None)
926 a('--new_tool',default=None)
927 a('--tool_version',default=None)
928 a('--include_dependencies',default="yes")
929 a('--citations',default=None)
930 a('--additional_parameters', dest='additional_parameters', action='append', default=[])
931 a('--edit_additional_parameters', action="store_true", default=False)
932 a('--envshpath',default="system")
933 opts, args = op.parse_args()
934 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
935 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
936 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
937 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
938 if opts.output_dir:
939 try:
940 os.makedirs(opts.output_dir)
941 except:
942 pass
943 opts.input_tab = [x.replace('"','').replace("'",'') for x in opts.input_tab]
944 for i,x in enumerate(opts.additional_parameters): # remove quotes we need to deal with spaces in CL params
945 opts.additional_parameters[i] = opts.additional_parameters[i].replace('"','')
946 r = ScriptRunner(opts)
947 if opts.make_Tool:
948 retcode = r.makeTooltar()
949 else:
950 retcode = r.run()
951 os.unlink(r.sfile)
952 if retcode:
953 sys.exit(retcode) # indicate failure to job runner
954
955
956 if __name__ == "__main__":
957 main()
958
959