comparison fubar-galaxytoolfactory-ca7db160878a/rgToolFactory.py @ 3:8c578211a681 draft

Fixed nasty silly bug - fixed locally but not previously propogated
author fubar
date Fri, 31 Aug 2012 23:04:13 -0400
parents
children
comparison
equal deleted inserted replaced
2:b55b59435fb1 3:8c578211a681
1 # rgToolFactory.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9
10 # August 11 2012
11 # changed to use shell=False and cl as a sequence
12
13 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
14 # It also serves as the wrapper for the new tool.
15 #
16 # you paste and run your script
17 # Only works for simple scripts that read one input from the history.
18 # Optionally can write one new history dataset,
19 # and optionally collect any number of outputs into links on an autogenerated HTML page.
20
21 # DO NOT install on a public or important site - please.
22
23 # installed generated tools are fine if the script is safe.
24 # They just run normally and their user cannot do anything unusually insecure
25 # but please, practice safe toolshed.
26 # Read the fucking code before you install any tool
27 # especially this one
28
29 # After you get the script working on some test data, you can
30 # optionally generate a toolshed compatible gzip file
31 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
32 # safe and largely automated installation in a production Galaxy.
33
34 # If you opt for an HTML output, you get all the script outputs arranged
35 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
36 # Ugly but really inexpensive.
37 #
38 # Patches appreciated please.
39 #
40 #
41 # long route to June 2012 product
42 # Behold the awesome power of Galaxy and the toolshed with the tool factory binds to bind them
43 # derived from an integrated script model
44 # called rgBaseScriptWrapper.py
45 # Note to the unwary:
46 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
47 # There is nothing stopping a malicious user doing whatever they choose
48 # Extremely dangerous!!
49 # Totally insecure. So, trusted users only
50 #
51 # preferred model is a developer using their throw away workstation instance - ie a private site.
52 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
53 #
54
55 import sys
56 import shutil
57 import subprocess
58 import os
59 import time
60 import tempfile
61 import optparse
62 import tarfile
63 import re
64 import shutil
65 import math
66
67 progname = os.path.split(sys.argv[0])[1]
68 myversion = 'V000.2 June 2012'
69 verbose = False
70 debug = False
71 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
72
73 def timenow():
74 """return current time as a string
75 """
76 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
77
78
79 class ScriptRunner:
80 """class is a wrapper for an arbitrary script
81 """
82
83 def __init__(self,opts=None,treatbashSpecial=True):
84 """
85 cleanup inputs, setup some outputs
86
87 """
88 self.treatbashSpecial = treatbashSpecial
89 if opts.output_dir: # simplify for the tool tarball
90 os.chdir(opts.output_dir)
91 self.thumbformat = 'jpg'
92 self.opts = opts
93 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
94 self.toolid = self.toolname
95 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
96 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
97 self.xmlfile = '%s.xml' % self.toolname
98 s = open(self.opts.script_path,'r').readlines()
99 s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
100 self.script = '\n'.join(s)
101 fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
102 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
103 tscript.write(self.script)
104 tscript.close()
105 self.indentedScript = ''.join([' %s' % x for x in s]) # for restructured text in help
106 if opts.output_dir: # may not want these complexities
107 self.tlog = os.path.join(opts.output_dir,"%s_runner.log" % self.toolname)
108 art = '%s.%s' % (self.toolname,opts.interpreter)
109 artpath = os.path.join(self.opts.output_dir,art) # need full path
110 artifact = open(artpath,'w') # use self.sfile as script source for Popen
111 artifact.write(self.script)
112 artifact.close()
113 self.cl = []
114 self.html = []
115 a = self.cl.append
116 a(opts.interpreter)
117 if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
118 a(self.sfile)
119 else:
120 a('-') # stdin
121 a(opts.input_tab)
122 a(opts.output_tab)
123 self.outFormats = 'tabular' # TODO make this an option at tool generation time
124 self.inputFormats = 'tabular' # TODO make this an option at tool generation time
125 self.test1Input = '%s_test1_input.xls' % self.toolname
126 self.test1Output = '%s_test1_output.xls' % self.toolname
127 self.test1HTML = '%s_test1_output.html' % self.toolname
128
129 def makeXML(self):
130 """
131 Create a Galaxy xml tool wrapper for the new script as a string to write out
132 fixme - use templating or something less fugly than this example of what we produce
133
134 <tool id="reverse" name="reverse" version="0.01">
135 <description>a tabular file</description>
136 <command interpreter="python">
137 reverse.py --script_path "$runMe" --interpreter "python"
138 --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
139 </command>
140 <inputs>
141 <param name="input1" type="data" format="tabular" label="Select a suitable input file from your history"/><param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
142
143 </inputs>
144 <outputs>
145 <data format="tabular" name="tab_file" label="${job_name}"/>
146
147 </outputs>
148 <help>
149
150 **What it Does**
151
152 Reverse the columns in a tabular file
153
154 </help>
155 <configfiles>
156 <configfile name="runMe">
157
158 # reverse order of columns in a tabular file
159 import sys
160 inp = sys.argv[1]
161 outp = sys.argv[2]
162 i = open(inp,'r')
163 o = open(outp,'w')
164 for row in i:
165 rs = row.rstrip().split('\t')
166 rs.reverse()
167 o.write('\t'.join(rs))
168 o.write('\n')
169 i.close()
170 o.close()
171
172
173 </configfile>
174 </configfiles>
175 </tool>
176
177 """
178 newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
179 %(tooldesc)s
180 %(command)s
181 <inputs>
182 %(inputs)s
183 </inputs>
184 <outputs>
185 %(outputs)s
186 </outputs>
187 <configfiles>
188 <configfile name="runMe">
189 %(script)s
190 </configfile>
191 </configfiles>
192 %(tooltests)s
193 <help>
194 %(help)s
195 </help>
196 </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
197
198 newCommand="""<command interpreter="python">
199 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
200 --tool_name "%(toolname)s" %(command_inputs)s %(command_outputs)s
201 </command>""" # may NOT be an input or htmlout
202 tooltestsTabOnly = """<tests><test>
203 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
204 <param name="job_name" value="test1"/>
205 <param name="runMe" value="$runMe"/>
206 <output name="tab_file" file="%(test1Output)s" ftype="tabular"/>
207 </test></tests>"""
208 tooltestsHTMLOnly = """<tests><test>
209 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
210 <param name="job_name" value="test1"/>
211 <param name="runMe" value="$runMe"/>
212 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
213 </test></tests>"""
214 tooltestsBoth = """<tests><test>
215 <param name="input1" value="%(test1Input)s" ftype="tabular"/>
216 <param name="job_name" value="test1"/>
217 <param name="runMe" value="$runMe"/>
218 <output name="tab_file" file="%(test1Output)s" ftype="tabular" />
219 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
220 </test></tests>"""
221 xdict = {}
222 xdict['tool_version'] = self.opts.tool_version
223 xdict['test1Input'] = self.test1Input
224 xdict['test1HTML'] = self.test1HTML
225 xdict['test1Output'] = self.test1Output
226 if self.opts.make_HTML and self.opts.output_tab <> 'None':
227 xdict['tooltests'] = tooltestsBoth % xdict
228 elif self.opts.make_HTML:
229 xdict['tooltests'] = tooltestsHTMLOnly % xdict
230 else:
231 xdict['tooltests'] = tooltestsTabOnly % xdict
232 xdict['script'] = self.script # configfile is least painful way to embed script to avoid external dependencies
233 if self.opts.help_text:
234 xdict['help'] = open(self.opts.help_text,'r').read()
235 else:
236 xdict['help'] = 'Please ask the tool author for help as none was supplied at tool generation'
237 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
238 coda.append(self.indentedScript)
239 coda.append('**Attribution** This Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.' % (self.opts.user_email,timenow()))
240 coda.append('See %s for details of that project' % (toolFactoryURL))
241 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
242 if self.opts.tool_desc:
243 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
244 else:
245 xdict['tooldesc'] = ''
246 xdict['command_outputs'] = ''
247 xdict['outputs'] = ''
248 if self.opts.input_tab <> 'None':
249 xdict['command_inputs'] = '--input_tab "$input1" ' # the space may matter a lot if we append something
250 xdict['inputs'] = '<param name="input1" type="data" format="%s" label="Select a suitable input file from your history"/> \n' % self.inputFormats
251 else:
252 xdict['command_inputs'] = '' # assume no input - eg a random data generator
253 xdict['inputs'] = ''
254 xdict['inputs'] += '<param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
255 xdict['toolname'] = self.toolname
256 xdict['toolid'] = self.toolid
257 xdict['interpreter'] = self.opts.interpreter
258 xdict['scriptname'] = self.sfile
259 if self.opts.make_HTML:
260 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes" '
261 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
262 if self.opts.output_tab <> 'None':
263 xdict['command_outputs'] += ' --output_tab "$tab_file"'
264 xdict['outputs'] += ' <data format="%s" name="tab_file" label="${job_name}"/>\n' % self.outFormats
265 xdict['command'] = newCommand % xdict
266 xmls = newXML % xdict
267 xf = open(self.xmlfile,'w')
268 xf.write(xmls)
269 xf.write('\n')
270 xf.close()
271 # ready for the tarball
272
273
274 def makeTooltar(self):
275 """
276 a tool is a gz tarball with eg
277 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
278 """
279 retval = self.run()
280 if retval:
281 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
282 sys.exit(1)
283 self.makeXML()
284 tdir = self.toolname
285 os.mkdir(tdir)
286 if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
287 testdir = os.path.join(tdir,'test-data')
288 os.mkdir(testdir) # make tests directory
289 shutil.copyfile(self.opts.input_tab,os.path.join(testdir,self.test1Input))
290 if self.opts.output_tab <> 'None':
291 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
292 if self.opts.make_HTML:
293 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
294 if self.opts.output_dir:
295 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
296 op = '%s.py' % self.toolname # new name
297 outpiname = os.path.join(tdir,op) # path for the tool tarball
298 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
299 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (op,pyin,pyin),]
300 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
301 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
302 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
303 notes += pi
304 outpi = open(outpiname,'w')
305 outpi.write(''.join(notes))
306 outpi.write('\n')
307 outpi.close()
308 stname = os.path.join(tdir,self.sfile)
309 if not os.path.exists(stname):
310 shutil.copyfile(self.sfile, stname)
311 xtname = os.path.join(tdir,self.xmlfile)
312 if not os.path.exists(xtname):
313 shutil.copyfile(self.xmlfile,xtname)
314 tarpath = "%s.gz" % self.toolname
315 tar = tarfile.open(tarpath, "w:gz")
316 tar.add(tdir,arcname=self.toolname)
317 tar.close()
318 shutil.copyfile(tarpath,self.opts.new_tool)
319 shutil.rmtree(tdir)
320 ## TODO: replace with optional direct upload to local toolshed?
321 return retval
322
323 def compressPDF(self,inpdf=None,thumbformat='png'):
324 """need absolute path to pdf
325 """
326 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
327 hf,hlog = tempfile.mkstemp(suffix="%s.log" % self.toolname)
328 sto = open(hlog,'w')
329 outpdf = '%s_compressed' % inpdf
330 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
331 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
332 retval1 = x.wait()
333 if retval1 == 0:
334 os.unlink(inpdf)
335 shutil.move(outpdf,inpdf)
336 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
337 cl2 = ['convert', inpdf, outpng]
338 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
339 retval2 = x.wait()
340 sto.close()
341 retval = retval1 or retval2
342 return retval
343
344
345 def getfSize(self,fpath,outpath):
346 """
347 format a nice file size string
348 """
349 size = ''
350 fp = os.path.join(outpath,fpath)
351 if os.path.isfile(fp):
352 size = '0 B'
353 n = float(os.path.getsize(fp))
354 if n > 2**20:
355 size = '%1.1f MB' % (n/2**20)
356 elif n > 2**10:
357 size = '%1.1f KB)' % (n/2**10)
358 elif n > 0:
359 size = '%d B' % (int(n))
360 return size
361
362 def makeHtml(self):
363 """ Create an HTML file content to list all the artifacts found in the output_dir
364 """
365
366 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
367 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
368 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
369 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
370 <title></title>
371 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
372 </head>
373 <body>
374 <div class="toolFormBody">
375 """
376 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
377 galhtmlpostfix = """</div></body></html>\n"""
378
379 flist = os.listdir(self.opts.output_dir)
380 flist = [x for x in flist if x <> 'Rplots.pdf']
381 flist.sort()
382 html = []
383 html.append(galhtmlprefix % progname)
384 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
385 fhtml = []
386 if len(flist) > 0:
387 pdflist = []
388 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
389 nacross = 1
390 if npdf > 0:
391 nacross = int(round(math.log(npdf,2)))
392 nacross = max(1,nacross)
393 width = min(400,int(1200/nacross))
394 for rownum,fname in enumerate(flist):
395 dname,e = os.path.splitext(fname)
396 sfsize = self.getfSize(fname,self.opts.output_dir)
397 if e.lower() == '.pdf' : # compress and make a thumbnail
398 thumb = '%s.%s' % (dname,self.thumbformat)
399 pdff = os.path.join(self.opts.output_dir,fname)
400 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
401 if retval == 0:
402 pdflist.append((fname,thumb))
403 if (rownum+1) % 2 == 0:
404 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
405 else:
406 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
407 ntogo = nacross # counter for table row padding with empty cells
408 if len(pdflist) > 0:
409 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
410 for i,paths in enumerate(pdflist):
411 fname,thumb = paths
412 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
413 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
414 if ((i+1) % nacross == 0):
415 s += '</tr>\n'
416 ntogo = 0
417 if i < (npdf - 1): # more to come
418 s += '<tr>'
419 ntogo = nacross
420 else:
421 ntogo -= 1
422 html.append(s)
423 if html[-1].strip().endswith('</tr>'):
424 html.append('</table></div>\n')
425 else:
426 if ntogo > 0: # pad
427 html.append('<td>&nbsp;</td>'*ntogo)
428 html.append('</tr></table></div>\n')
429 if len(fhtml) > 0:
430 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
431 fhtml.append('</table></div><br/>')
432 html += fhtml # add all non-pdf files to the end of the display
433 else:
434 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
435 rlog = open(self.tlog,'r').readlines()
436 rlog = [x for x in rlog if x.strip() > '']
437 if len(rlog) > 1:
438 html.append('<div class="toolFormTitle">%s log</div><pre>\n' % self.opts.interpreter)
439 html += rlog
440 html.append('</pre>\n')
441 html.append(galhtmlattr % (self.toolname))
442 html.append(galhtmlpostfix)
443 htmlf = file(self.opts.output_html,'w')
444 htmlf.write('\n'.join(html))
445 htmlf.write('\n')
446 htmlf.close()
447 self.html = html
448
449
450 def run(self):
451 """
452 scripts must be small enough not to fill the pipe!
453 """
454 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
455 retval = self.runBash()
456 else:
457 if self.opts.output_dir:
458 sto = open(self.tlog,'w')
459 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
460 sto.flush()
461 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
462 else:
463 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
464 p.stdin.write(self.script)
465 p.stdin.close()
466 retval = p.wait()
467 if self.opts.output_dir:
468 sto.close()
469 if self.opts.make_HTML:
470 self.makeHtml()
471 return retval
472
473 def runBash(self):
474 """
475 cannot use - for bash so use self.sfile
476 """
477 if self.opts.output_dir:
478 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
479 sto = open(self.tlog,'w')
480 sto.write(s)
481 sto.flush()
482 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
483 else:
484 p = subprocess.Popen(self.cl,shell=False)
485 retval = p.wait()
486 if self.opts.output_dir:
487 sto.close()
488 if self.opts.make_HTML:
489 self.makeHtml()
490 return retval
491
492
493 def main():
494 u = """
495 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
496 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
497 </command>
498 """
499 op = optparse.OptionParser()
500 a = op.add_option
501 a('--script_path',default=None)
502 a('--tool_name',default=None)
503 a('--interpreter',default=None)
504 a('--output_dir',default=None)
505 a('--output_html',default=None)
506 a('--input_tab',default="None")
507 a('--output_tab',default="None")
508 a('--user_email',default='Unknown')
509 a('--bad_user',default=None)
510 a('--make_Tool',default=None)
511 a('--make_HTML',default=None)
512 a('--help_text',default=None)
513 a('--tool_desc',default=None)
514 a('--new_tool',default=None)
515 a('--tool_version',default=None)
516 opts, args = op.parse_args()
517 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
518 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
519 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
520 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
521 if opts.output_dir:
522 try:
523 os.makedirs(opts.output_dir)
524 except:
525 pass
526 r = ScriptRunner(opts)
527 if opts.make_Tool:
528 retcode = r.makeTooltar()
529 else:
530 retcode = r.run()
531 os.unlink(r.sfile)
532 if retcode:
533 sys.exit(retcode) # indicate failure to job runner
534
535
536 if __name__ == "__main__":
537 main()
538
539