comparison rgToolFactory2.py @ 0:c34063ab3735 draft

Initial commit of code in iuc github repository
author fubar
date Thu, 01 Jan 2015 21:58:00 -0500
parents
children 6a3c292412fa
comparison
equal deleted inserted replaced
-1:000000000000 0:c34063ab3735
1 # rgToolFactoryMultIn.py
2 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
3 #
4 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
5 #
6 # all rights reserved
7 # Licensed under the LGPL
8 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
9 #
10 # sept 2014 added additional params from
11 # https://bitbucket.org/mvdbeek/dockertoolfactory/src/d4863bcf7b521532c7e8c61b6333840ba5393f73/DockerToolFactory.py?at=default
12 # passing them is complex
13 # and they are restricted to NOT contain commas or double quotes to ensure that they can be safely passed together on
14 # the toolfactory command line as a comma delimited double quoted string for parsing and passing to the script
15 # see examples on this tool form
16
17 # august 2014
18
19 # Allows arbitrary number of input files
20 # NOTE positional parameters are now passed to script
21 # and output (may be "None") is *before* arbitrary number of inputs
22 #
23 # march 2014
24 # had to remove dependencies because cross toolshed dependencies are not possible - can't pre-specify a toolshed url for graphicsmagick and ghostscript
25 # grrrrr - night before a demo
26 # added dependencies to a tool_dependencies.xml if html page generated so generated tool is properly portable
27 #
28 # added ghostscript and graphicsmagick as dependencies
29 # fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp
30 # errors ensued
31 #
32 # august 2013
33 # found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn
34 #
35 # july 2013
36 # added ability to combine images and individual log files into html output
37 # just make sure there's a log file foo.log and it will be output
38 # together with all images named like "foo_*.pdf
39 # otherwise old format for html
40 #
41 # January 2013
42 # problem pointed out by Carlos Borroto
43 # added escaping for <>$ - thought I did that ages ago...
44 #
45 # August 11 2012
46 # changed to use shell=False and cl as a sequence
47
48 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
49 # It also serves as the wrapper for the new tool.
50 #
51 # you paste and run your script
52 # Only works for simple scripts that read one input from the history.
53 # Optionally can write one new history dataset,
54 # and optionally collect any number of outputs into links on an autogenerated HTML page.
55
56 # DO NOT install on a public or important site - please.
57
58 # installed generated tools are fine if the script is safe.
59 # They just run normally and their user cannot do anything unusually insecure
60 # but please, practice safe toolshed.
61 # Read the fucking code before you install any tool
62 # especially this one
63
64 # After you get the script working on some test data, you can
65 # optionally generate a toolshed compatible gzip file
66 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
67 # safe and largely automated installation in a production Galaxy.
68
69 # If you opt for an HTML output, you get all the script outputs arranged
70 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
71 # Ugly but really inexpensive.
72 #
73 # Patches appreciated please.
74 #
75 #
76 # long route to June 2012 product
77 # Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them
78 # derived from an integrated script model
79 # called rgBaseScriptWrapper.py
80 # Note to the unwary:
81 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
82 # There is nothing stopping a malicious user doing whatever they choose
83 # Extremely dangerous!!
84 # Totally insecure. So, trusted users only
85 #
86 # preferred model is a developer using their throw away workstation instance - ie a private site.
87 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
88 #
89
90 import sys
91 import shutil
92 import subprocess
93 import os
94 import time
95 import tempfile
96 import optparse
97 import tarfile
98 import re
99 import shutil
100 import math
101
102 progname = os.path.split(sys.argv[0])[1]
103 myversion = 'V001.1 March 2014'
104 verbose = False
105 debug = False
106 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
107
108 # if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated
109 # tool xml
110 toolhtmldepskel = """<?xml version="1.0"?>
111 <tool_dependency>
112 <package name="ghostscript" version="9.10">
113 <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
114 </package>
115 <package name="graphicsmagick" version="1.3.18">
116 <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
117 </package>
118 <readme>
119 %s
120 </readme>
121 </tool_dependency>
122 """
123
124 toolhtmldepskel = """<?xml version="1.0"?>
125 <tool_dependency>
126 <readme>
127 %s
128 </readme>
129 </tool_dependency>
130 """
131
132 protorequirements = """<requirements>
133 <requirement type="package" version="9.10">ghostscript</requirement>
134 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
135 </requirements>"""
136
137 def timenow():
138 """return current time as a string
139 """
140 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
141
142 html_escape_table = {
143 "&": "&amp;",
144 ">": "&gt;",
145 "<": "&lt;",
146 "$": "\$"
147 }
148
149 def html_escape(text):
150 """Produce entities within text."""
151 return "".join(html_escape_table.get(c,c) for c in text)
152
153 def cmd_exists(cmd):
154 return subprocess.call("type " + cmd, shell=True,
155 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
156
157 def parse_citations(citations_text):
158 """
159 """
160 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
161 citation_tuples = []
162 for citation in citations:
163 if citation.startswith("doi"):
164 citation_tuples.append( ("doi", citation[len("doi"):].strip() ) )
165 else:
166 citation_tuples.append( ("bibtex", citation[len("bibtex"):].strip() ) )
167 return citation_tuples
168
169
170 class ScriptRunner:
171 """class is a wrapper for an arbitrary script
172 """
173
174 def __init__(self,opts=None,treatbashSpecial=True):
175 """
176 cleanup inputs, setup some outputs
177
178 """
179 self.useGM = cmd_exists('gm')
180 self.useIM = cmd_exists('convert')
181 self.useGS = cmd_exists('gs')
182 self.temp_warned = False # we want only one warning if $TMP not set
183 self.treatbashSpecial = treatbashSpecial
184 if opts.output_dir: # simplify for the tool tarball
185 os.chdir(opts.output_dir)
186 self.thumbformat = 'png'
187 self.opts = opts
188 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
189 self.toolid = self.toolname
190 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
191 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
192 self.xmlfile = '%s.xml' % self.toolname
193 s = open(self.opts.script_path,'r').readlines()
194 s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
195 self.script = '\n'.join(s)
196 fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
197 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
198 tscript.write(self.script)
199 tscript.close()
200 self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help
201 self.escapedScript = '\n'.join([html_escape(x) for x in s])
202 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname)
203 if opts.output_dir: # may not want these complexities
204 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname)
205 art = '%s.%s' % (self.toolname,opts.interpreter)
206 artpath = os.path.join(self.opts.output_dir,art) # need full path
207 artifact = open(artpath,'w') # use self.sfile as script source for Popen
208 artifact.write(self.script)
209 artifact.close()
210 self.cl = []
211 self.html = []
212 self.test1Inputs = [] # now a list
213 a = self.cl.append
214 a(opts.interpreter)
215 if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
216 a(self.sfile)
217 else:
218 a('-') # stdin
219 # if multiple inputs - positional or need to distinguish them with cl params
220 if opts.input_tab:
221 tests = []
222 for i,intab in enumerate(opts.input_tab): # if multiple, make tests
223 if intab.find(',') <> -1:
224 (gpath,uname) = intab.split(',')
225 else:
226 gpath = uname = intab
227 tests.append(os.path.basename(gpath))
228 self.test1Inputs = '<param name="input_tab" value="%s" />' % (','.join(tests))
229 else:
230 self.test1Inputs = ''
231 # we always pass path,name pairs in using python optparse append
232 # but the command line has to be different
233 self.infile_paths = ','.join([x.split(',')[0] for x in self.opts.input_tab])
234 self.infile_names = ','.join([x.split(',')[1] for x in self.opts.input_tab])
235 if self.opts.interpreter == 'python':
236 # yes, this is how additional parameters are always passed in python - to the TF itself and to
237 # scripts to avoid having unknown parameter names (yes, they can be parsed but...) on the command line
238 a('--INPATHS "%s"' % (self.infile_paths))
239 a('--INNAMES "%s"' % (self.infile_names))
240 if self.opts.output_tab:
241 a('--OUTPATH "%s"' % self.opts.output_tab)
242 for p in opts.additional_parameters:
243 p = p.replace('"','')
244 psplit=p.split(',')
245 param = psplit[0]
246 value = psplit[1]
247 a('--additional_parameters "%s,%s"' % (param,value))
248 if (self.opts.interpreter == 'Rscript'):
249 # pass params on command line
250 a('INPATHS "%s"' % self.infile_paths)
251 a('INNAMES "%s"' % self.infile_names)
252 if self.opts.output_tab:
253 a('OUTPATH "%s"' % self.opts.output_tab)
254 for param in opts.additional_parameters:
255 param, value=param.split(',')
256 a('%s="%s"' % (param,value))
257 if (self.opts.interpreter == 'perl'):
258 # pass params on command line
259 a('%s' % self.infile_paths)
260 a('%s' % self.infile_names)
261 if self.opts.output_tab:
262 a('%s' % self.opts.output_tab)
263 for param in opts.additional_parameters:
264 param, value=param.split(',')
265 if (value.find(' ') <> -1):
266 a('%s="%s"' % (param,value))
267 else:
268 a('%s=%s' % (param,value))
269
270 if self.opts.interpreter == 'sh' or self.opts.interpreter == 'bash':
271 # more is better - now move all params into environment AND drop on to command line.
272 self.cl.insert(0,'env')
273 self.cl.insert(1,'INPATHS=%s' % (self.infile_paths))
274 self.cl.insert(2,'INNAMES=%s' % (self.infile_names))
275 if self.opts.output_tab:
276 self.cl.insert(3,'OUTPATH=%s' % (self.opts.output_tab))
277 a('OUTPATH=%s' % (self.opts.output_tab))
278 # sets those environment variables for the script
279 # additional params appear in CL - yes, it's confusing
280 for i,param in enumerate(opts.additional_parameters):
281 psplit = param.split(',')
282 n = psplit[0]
283 v = psplit[1]
284 if (v.find(' ') <> -1):
285 a('%s="%s"' % (n,v))
286 self.cl.insert(4+i,'%s="%s"' % (n,v))
287 else:
288 a('%s=%s' % (n,v))
289 self.cl.insert(4+i,'%s=%s' % (n,v))
290
291
292 self.outFormats = opts.output_format
293 self.inputFormats = opts.input_formats
294 self.test1Output = '%s_test1_output.xls' % self.toolname
295 self.test1HTML = '%s_test1_output.html' % self.toolname
296
297 def makeXML(self):
298 """
299 Create a Galaxy xml tool wrapper for the new script as a string to write out
300 fixme - use templating or something less fugly than this example of what we produce
301
302 <tool id="reverse" name="reverse" version="0.01">
303 <description>a tabular file</description>
304 <command interpreter="python">
305 reverse.py --script_path "$runMe" --interpreter "python"
306 --tool_name "reverse" --input_tab "$input1" --output_tab "$tab_file"
307 </command>
308 <inputs>
309 <param name="input1" type="data" format="tabular" label="Select one or more input files from your history"/>
310 <param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
311 </inputs>
312 <outputs>
313 <data format="tabular" name="tab_file" label="${job_name}"/>
314
315 </outputs>
316 <help>
317
318 **What it Does**
319
320 Reverse the columns in a tabular file
321
322 </help>
323 <configfiles>
324 <configfile name="runMe">
325
326 # reverse order of columns in a tabular file
327 import sys
328 inp = sys.argv[1]
329 outp = sys.argv[2]
330 i = open(inp,'r')
331 o = open(outp,'w')
332 for row in i:
333 rs = row.rstrip().split('\t')
334 rs.reverse()
335 o.write('\t'.join(rs))
336 o.write('\n')
337 i.close()
338 o.close()
339
340
341 </configfile>
342 </configfiles>
343 </tool>
344
345 """
346 newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
347 %(tooldesc)s
348 %(requirements)s
349 <command interpreter="python">
350 %(command)s
351 </command>
352 <inputs>
353 %(inputs)s
354 %(additionalInputs)s
355 </inputs>
356 <outputs>
357 %(outputs)s
358 </outputs>
359 <configfiles>
360 <configfile name="runMe">
361 %(script)s
362 </configfile>
363 </configfiles>
364 <tests>
365 %(tooltests)s
366 </tests>
367 <help>
368
369 %(help)s
370
371 </help>
372 <citations>
373 %(citations)s
374 <citation type="doi">10.1093/bioinformatics/bts573</citation>
375 </citations>
376 </tool>""" # needs a dict with toolname, toolid, interpreter, scriptname, command, inputs as a multi line string ready to write, outputs ditto, help ditto
377
378 newCommand="""
379 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
380 --tool_name "%(toolname)s"
381 %(command_inputs)s
382 %(command_outputs)s
383 """
384 # may NOT be an input or htmlout - appended later
385 tooltestsTabOnly = """
386 <test>
387 %(test1Inputs)s
388 <param name="job_name" value="test1"/>
389 <param name="runMe" value="$runMe"/>
390 <output name="output1="%(test1Output)s" ftype="tabular"/>
391 %(additionalParams)s
392 </test>
393 """
394 tooltestsHTMLOnly = """
395 <test>
396 %(test1Inputs)s
397 <param name="job_name" value="test1"/>
398 <param name="runMe" value="$runMe"/>
399 %(additionalParams)s
400 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
401 </test>
402 """
403 tooltestsBoth = """
404 <test>
405 %(test1Inputs)s
406 <param name="job_name" value="test1"/>
407 <param name="runMe" value="$runMe"/>
408 %(additionalParams)s
409 <output name="output1" file="%(test1Output)s" ftype="tabular" />
410 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
411 </test>
412 """
413 xdict = {}
414 xdict['additionalParams'] = ''
415 xdict['additionalInputs'] = ''
416 if self.opts.additional_parameters:
417 if self.opts.edit_additional_parameters: # add to new tool form with default value set to original value
418 xdict['additionalInputs'] = '\n'.join(['<param name="%s" value="%s" label="%s" help="%s" type="%s"/>' % (x.split(',')[0],x.split(',')[1],x.split(',')[2],
419 x.split(',')[3], x.split(',')[4]) for x in self.opts.additional_parameters])
420 xdict['additionalParams'] = '\n'.join(['<param name="%s" value="%s" />' % (x.split(',')[0],x.split(',')[1]) for x in self.opts.additional_parameters])
421 xdict['requirements'] = ''
422 if self.opts.make_HTML:
423 if self.opts.include_dependencies == "yes":
424 xdict['requirements'] = protorequirements
425 xdict['tool_version'] = self.opts.tool_version
426 xdict['test1HTML'] = self.test1HTML
427 xdict['test1Output'] = self.test1Output
428 xdict['test1Inputs'] = self.test1Inputs
429 if self.opts.make_HTML and self.opts.output_tab:
430 xdict['tooltests'] = tooltestsBoth % xdict
431 elif self.opts.make_HTML:
432 xdict['tooltests'] = tooltestsHTMLOnly % xdict
433 else:
434 xdict['tooltests'] = tooltestsTabOnly % xdict
435 xdict['script'] = self.escapedScript
436 # configfile is least painful way to embed script to avoid external dependencies
437 # but requires escaping of <, > and $ to avoid Mako parsing
438 if self.opts.help_text:
439 helptext = open(self.opts.help_text,'r').readlines()
440 helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek
441 xdict['help'] = ''.join([x for x in helptext])
442 else:
443 xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email)
444 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
445 coda.append('\n')
446 coda.append(self.indentedScript)
447 coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow()))
448 coda.append('See %s for details of that project' % (toolFactoryURL))
449 coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ')
450 coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n')
451 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
452 if self.opts.tool_desc:
453 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
454 else:
455 xdict['tooldesc'] = ''
456 xdict['command_outputs'] = ''
457 xdict['outputs'] = ''
458 if self.opts.input_tab <> 'None':
459 cins = ['\n',]
460 cins.append('#for intab in $input1:')
461 cins.append('--input_tab "$intab,$intab.name"')
462 cins.append('#end for\n')
463 xdict['command_inputs'] = '\n'.join(cins)
464 xdict['inputs'] = '''<param name="input_tab" multiple="true" type="data" format="%s" label="Select one or more %s input files from your history"
465 help="Multiple inputs may be selected assuming the script can deal with them..."/> \n''' % (self.inputFormats,self.inputFormats)
466 else:
467 xdict['command_inputs'] = '' # assume no input - eg a random data generator
468 xdict['inputs'] = ''
469 if (len(self.opts.additional_parameters) > 0):
470 cins = ['\n',]
471 for params in self.opts.additional_parameters:
472 if self.opts.edit_additional_parameters:
473 psplit = params.split(',') # name,value...
474 psplit[1] = '$%s' % psplit[0] # replace with form value
475 cins.append('--additional_parameters "%s"' % ','.join(psplit))
476 else:
477 cins.append('--additional_parameters "%s"' % params)
478 xdict['command_inputs'] = '%s\n%s' % (xdict['command_inputs'],'\n'.join(cins))
479 xdict['inputs'] += '<param name="job_name" type="text" size="60" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
480 xdict['toolname'] = self.toolname
481 xdict['toolid'] = self.toolid
482 xdict['interpreter'] = self.opts.interpreter
483 xdict['scriptname'] = self.sfile
484 if self.opts.make_HTML:
485 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"'
486 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
487 else:
488 xdict['command_outputs'] += ' --output_dir "./"'
489 if self.opts.output_tab:
490 xdict['command_outputs'] += ' --output_tab "$tab_file"'
491 xdict['outputs'] += ' <data format="%s" name="output1" label="${job_name}"/>\n' % self.outFormats
492 xdict['command'] = newCommand % xdict
493 if self.opts.citations:
494 citationstext = open(self.opts.citations,'r').read()
495 citation_tuples = parse_citations(citationstext)
496 citations_xml = ""
497 for citation_type, citation_content in citation_tuples:
498 citation_xml = """<citation type="%s">%s</citation>""" % (citation_type, html_escape(citation_content))
499 citations_xml += citation_xml
500 xdict['citations'] = citations_xml
501 else:
502 xdict['citations'] = ""
503 xmls = newXML % xdict
504 xf = open(self.xmlfile,'w')
505 xf.write(xmls)
506 xf.write('\n')
507 xf.close()
508 # ready for the tarball
509
510
511 def makeTooltar(self):
512 """
513 a tool is a gz tarball with eg
514 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
515 """
516 retval = self.run()
517 if retval:
518 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
519 sys.exit(1)
520 tdir = self.toolname
521 os.mkdir(tdir)
522 self.makeXML()
523 if self.opts.make_HTML:
524 if self.opts.help_text:
525 hlp = open(self.opts.help_text,'r').read()
526 else:
527 hlp = 'Please ask the tool author for help as none was supplied at tool generation\n'
528 if self.opts.include_dependencies == "yes":
529 tooldepcontent = toolhtmldepskel % hlp
530 else:
531 tooldepcontent = emptytoolhtmldepskel % hlp
532 depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w')
533 depf.write(tooldepcontent)
534 depf.write('\n')
535 depf.close()
536 if self.opts.input_tab <> 'None': # no reproducible test otherwise? TODO: maybe..
537 testdir = os.path.join(tdir,'test-data')
538 os.mkdir(testdir) # make tests directory
539 for i,intab in enumerate(self.opts.input_tab):
540 si = self.opts.input_tab[i]
541 if si.find(',') <> -1:
542 s = si.split(',')[0]
543 si = s
544 dest = os.path.join(testdir,os.path.basename(si))
545 if si <> dest:
546 shutil.copyfile(si,dest)
547 if self.opts.output_tab <> None:
548 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
549 if self.opts.make_HTML:
550 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
551 if self.opts.output_dir:
552 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
553 outpif = '%s.py' % self.toolname # new name
554 outpiname = os.path.join(tdir,outpif) # path for the tool tarball
555 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
556 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),]
557 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
558 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
559 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
560 notes += pi
561 outpi = open(outpiname,'w')
562 outpi.write(''.join(notes))
563 outpi.write('\n')
564 outpi.close()
565 stname = os.path.join(tdir,self.sfile)
566 if not os.path.exists(stname):
567 shutil.copyfile(self.sfile, stname)
568 xtname = os.path.join(tdir,self.xmlfile)
569 if not os.path.exists(xtname):
570 shutil.copyfile(self.xmlfile,xtname)
571 tarpath = "%s.tar.gz" % self.toolname
572 tar = tarfile.open(tarpath, "w:gz")
573 tar.add(tdir,arcname='%s' % self.toolname)
574 tar.close()
575 shutil.copyfile(tarpath,self.opts.new_tool)
576 shutil.rmtree(tdir)
577 ## TODO: replace with optional direct upload to local toolshed?
578 return retval
579
580
581 def compressPDF(self,inpdf=None,thumbformat='png'):
582 """need absolute path to pdf
583 note that GS gets confoozled if no $TMP or $TEMP
584 so we set it
585 """
586 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
587 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
588 sto = open(hlog,'a')
589 our_env = os.environ.copy()
590 our_tmp = our_env.get('TMP',None)
591 if not our_tmp:
592 our_tmp = our_env.get('TEMP',None)
593 if not (our_tmp and os.path.exists(our_tmp)):
594 newtmp = os.path.join(self.opts.output_dir,'tmp')
595 try:
596 os.mkdir(newtmp)
597 except:
598 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
599 our_env['TEMP'] = newtmp
600 if not self.temp_warned:
601 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
602 self.temp_warned = True
603 outpdf = '%s_compressed' % inpdf
604 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
605 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
606 retval1 = x.wait()
607 sto.close()
608 if retval1 == 0:
609 os.unlink(inpdf)
610 shutil.move(outpdf,inpdf)
611 os.unlink(hlog)
612 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
613 sto = open(hlog,'w')
614 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
615 if self.useGM:
616 cl2 = ['gm', 'convert', inpdf, outpng]
617 else: # assume imagemagick
618 cl2 = ['convert', inpdf, outpng]
619 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
620 retval2 = x.wait()
621 sto.close()
622 if retval2 == 0:
623 os.unlink(hlog)
624 retval = retval1 or retval2
625 return retval
626
627
628 def getfSize(self,fpath,outpath):
629 """
630 format a nice file size string
631 """
632 size = ''
633 fp = os.path.join(outpath,fpath)
634 if os.path.isfile(fp):
635 size = '0 B'
636 n = float(os.path.getsize(fp))
637 if n > 2**20:
638 size = '%1.1f MB' % (n/2**20)
639 elif n > 2**10:
640 size = '%1.1f KB' % (n/2**10)
641 elif n > 0:
642 size = '%d B' % (int(n))
643 return size
644
645 def makeHtml(self):
646 """ Create an HTML file content to list all the artifacts found in the output_dir
647 """
648
649 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
650 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
651 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
652 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
653 <title></title>
654 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
655 </head>
656 <body>
657 <div class="toolFormBody">
658 """
659 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
660 galhtmlpostfix = """</div></body></html>\n"""
661
662 flist = os.listdir(self.opts.output_dir)
663 flist = [x for x in flist if x <> 'Rplots.pdf']
664 flist.sort()
665 html = []
666 html.append(galhtmlprefix % progname)
667 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
668 fhtml = []
669 if len(flist) > 0:
670 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
671 logfiles.sort()
672 logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)]
673 logfiles.append(os.path.abspath(self.tlog)) # make it the last one
674 pdflist = []
675 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
676 for rownum,fname in enumerate(flist):
677 dname,e = os.path.splitext(fname)
678 sfsize = self.getfSize(fname,self.opts.output_dir)
679 if e.lower() == '.pdf' : # compress and make a thumbnail
680 thumb = '%s.%s' % (dname,self.thumbformat)
681 pdff = os.path.join(self.opts.output_dir,fname)
682 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
683 if retval == 0:
684 pdflist.append((fname,thumb))
685 else:
686 pdflist.append((fname,fname))
687 if (rownum+1) % 2 == 0:
688 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
689 else:
690 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
691 for logfname in logfiles: # expect at least tlog - if more
692 if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later
693 sectionname = 'All tool run'
694 if (len(logfiles) > 1):
695 sectionname = 'Other'
696 ourpdfs = pdflist
697 else:
698 realname = os.path.basename(logfname)
699 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
700 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
701 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
702 nacross = 1
703 npdf = len(ourpdfs)
704
705 if npdf > 0:
706 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
707 if int(nacross)**2 != npdf:
708 nacross += 1
709 nacross = int(nacross)
710 width = min(400,int(1200/nacross))
711 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
712 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
713 ntogo = nacross # counter for table row padding with empty cells
714 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
715 for i,paths in enumerate(ourpdfs):
716 fname,thumb = paths
717 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
718 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
719 if ((i+1) % nacross == 0):
720 s += '</tr>\n'
721 ntogo = 0
722 if i < (npdf - 1): # more to come
723 s += '<tr>'
724 ntogo = nacross
725 else:
726 ntogo -= 1
727 html.append(s)
728 if html[-1].strip().endswith('</tr>'):
729 html.append('</table></div>\n')
730 else:
731 if ntogo > 0: # pad
732 html.append('<td>&nbsp;</td>'*ntogo)
733 html.append('</tr></table></div>\n')
734 logt = open(logfname,'r').readlines()
735 logtext = [x for x in logt if x.strip() > '']
736 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
737 if len(logtext) > 1:
738 html.append('\n<pre>\n')
739 html += logtext
740 html.append('\n</pre>\n')
741 else:
742 html.append('%s is empty<br/>' % logfname)
743 if len(fhtml) > 0:
744 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
745 fhtml.append('</table></div><br/>')
746 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
747 html += fhtml # add all non-pdf files to the end of the display
748 else:
749 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
750 html.append(galhtmlpostfix)
751 htmlf = file(self.opts.output_html,'w')
752 htmlf.write('\n'.join(html))
753 htmlf.write('\n')
754 htmlf.close()
755 self.html = html
756
757
758 def run(self):
759 """
760 scripts must be small enough not to fill the pipe!
761 """
762 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
763 retval = self.runBash()
764 else:
765 if self.opts.output_dir:
766 ste = open(self.elog,'w')
767 sto = open(self.tlog,'w')
768 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
769 sto.flush()
770 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
771 else:
772 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
773 p.stdin.write(self.script)
774 p.stdin.close()
775 retval = p.wait()
776 if self.opts.output_dir:
777 sto.close()
778 ste.close()
779 err = open(self.elog,'r').readlines()
780 if retval <> 0 and err: # problem
781 print >> sys.stderr,err
782 if self.opts.make_HTML:
783 self.makeHtml()
784 return retval
785
786 def runBash(self):
787 """
788 cannot use - for bash so use self.sfile
789 """
790 if self.opts.output_dir:
791 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
792 sto = open(self.tlog,'w')
793 sto.write(s)
794 sto.flush()
795 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
796 else:
797 p = subprocess.Popen(self.cl,shell=False)
798 retval = p.wait()
799 if self.opts.output_dir:
800 sto.close()
801 if self.opts.make_HTML:
802 self.makeHtml()
803 return retval
804
805
806 def main():
807 u = """
808 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
809 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
810 </command>
811 """
812 op = optparse.OptionParser()
813 a = op.add_option
814 a('--script_path',default=None)
815 a('--tool_name',default=None)
816 a('--interpreter',default=None)
817 a('--output_dir',default='./')
818 a('--output_html',default=None)
819 a('--input_tab',default=[], action="append") # these are "galaxypath,metadataname" pairs
820 a("--input_formats",default="tabular")
821 a('--output_tab',default=None)
822 a('--output_format',default='tabular')
823 a('--user_email',default='Unknown')
824 a('--bad_user',default=None)
825 a('--make_Tool',default=None)
826 a('--make_HTML',default=None)
827 a('--help_text',default=None)
828 a('--tool_desc',default=None)
829 a('--new_tool',default=None)
830 a('--tool_version',default=None)
831 a('--include_dependencies',default="yes")
832 a('--citations',default=None)
833 a('--additional_parameters', dest='additional_parameters', action='append', default=[])
834 a('--edit_additional_parameters', action="store_true", default=False)
835 opts, args = op.parse_args()
836 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
837 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
838 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
839 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
840 if opts.output_dir:
841 try:
842 os.makedirs(opts.output_dir)
843 except:
844 pass
845 opts.input_tab = [x.replace('"','').replace("'",'') for x in opts.input_tab]
846 for i,x in enumerate(opts.additional_parameters): # remove quotes we need to deal with spaces in CL params
847 opts.additional_parameters[i] = opts.additional_parameters[i].replace('"','')
848 r = ScriptRunner(opts)
849 if opts.make_Tool:
850 retcode = r.makeTooltar()
851 else:
852 retcode = r.run()
853 os.unlink(r.sfile)
854 if retcode:
855 sys.exit(retcode) # indicate failure to job runner
856
857
858 if __name__ == "__main__":
859 main()
860
861