comparison tool1/tool1.py @ 0:9cc0f9a8912e draft default tip

Uploaded
author ambarishk
date Thu, 21 Apr 2016 06:45:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9cc0f9a8912e
1 # tool1/tool1.py - a self annotated version of rgToolFactory2.py generated by running rgToolFactory2.py
2 # to make a new Galaxy tool called tool1
3 # User admin@ngsap.com at 13/04/2016 19:49:50
4 # rgToolFactoryMultIn.py
5 # see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
6 #
7 # copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
8 #
9 # all rights reserved
10 # Licensed under the LGPL
11 # suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
12 #
13 # January 2015
14 # unified all setups by passing the script on the cl rather than via a PIPE - no need for treat_bash_special so removed
15 #
16 # in the process of building a complex tool
17 # added ability to choose one of the current toolshed package_r or package_perl or package_python dependencies and source that package
18 # add that package to tool_dependencies
19 # Note that once the generated tool is loaded, it will have that package's env.sh loaded automagically so there is no
20 # --envshpath in the parameters for the generated tool and it uses the system one which will be first on the adjusted path.
21 #
22 # sept 2014 added additional params from
23 # https://bitbucket.org/mvdbeek/dockertoolfactory/src/d4863bcf7b521532c7e8c61b6333840ba5393f73/DockerToolFactory.py?at=default
24 # passing them is complex
25 # and they are restricted to NOT contain commas or double quotes to ensure that they can be safely passed together on
26 # the toolfactory command line as a comma delimited double quoted string for parsing and passing to the script
27 # see examples on this tool form
28
29 # august 2014
30
31 # Allows arbitrary number of input files
32 # NOTE positional parameters are now passed to script
33 # and output (may be "None") is *before* arbitrary number of inputs
34 #
35 # march 2014
36 # had to remove dependencies because cross toolshed dependencies are not possible - can't pre-specify a toolshed url for graphicsmagick and ghostscript
37 # grrrrr - night before a demo
38 # added dependencies to a tool_dependencies.xml if html page generated so generated tool is properly portable
39 #
40 # added ghostscript and graphicsmagick as dependencies
41 # fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp
42 # errors ensued
43 #
44 # august 2013
45 # found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn
46 #
47 # july 2013
48 # added ability to combine images and individual log files into html output
49 # just make sure there's a log file foo.log and it will be output
50 # together with all images named like "foo_*.pdf
51 # otherwise old format for html
52 #
53 # January 2013
54 # problem pointed out by Carlos Borroto
55 # added escaping for <>$ - thought I did that ages ago...
56 #
57 # August 11 2012
58 # changed to use shell=False and cl as a sequence
59
60 # This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
61 # It also serves as the wrapper for the new tool.
62 #
63 # you paste and run your script
64 # Only works for simple scripts that read one input from the history.
65 # Optionally can write one new history dataset,
66 # and optionally collect any number of outputs into links on an autogenerated HTML page.
67
68 # DO NOT install on a public or important site - please.
69
70 # installed generated tools are fine if the script is safe.
71 # They just run normally and their user cannot do anything unusually insecure
72 # but please, practice safe toolshed.
73 # Read the fucking code before you install any tool
74 # especially this one
75
76 # After you get the script working on some test data, you can
77 # optionally generate a toolshed compatible gzip file
78 # containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
79 # safe and largely automated installation in a production Galaxy.
80
81 # If you opt for an HTML output, you get all the script outputs arranged
82 # as a single Html history item - all output files are linked, thumbnails for all the pdfs.
83 # Ugly but really inexpensive.
84 #
85 # Patches appreciated please.
86 #
87 #
88 # long route to June 2012 product
89 # Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them
90 # derived from an integrated script model
91 # called rgBaseScriptWrapper.py
92 # Note to the unwary:
93 # This tool allows arbitrary scripting on your Galaxy as the Galaxy user
94 # There is nothing stopping a malicious user doing whatever they choose
95 # Extremely dangerous!!
96 # Totally insecure. So, trusted users only
97 #
98 # preferred model is a developer using their throw away workstation instance - ie a private site.
99 # no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
100 #
101
102 import sys
103 import shutil
104 import subprocess
105 import os
106 import time
107 import tempfile
108 import optparse
109 import tarfile
110 import re
111 import shutil
112 import math
113
114 progname = os.path.split(sys.argv[0])[1]
115 myversion = 'V001.1 March 2014'
116 verbose = False
117 debug = False
118 toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
119
120 # if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated
121 # tool xml
122
123 def timenow():
124 """return current time as a string
125 """
126 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
127
128 def quote_non_numeric(s):
129 """return a prequoted string for non-numerics
130 useful for perl and Rscript parameter passing?
131 """
132 try:
133 res = float(s)
134 return s
135 except ValueError:
136 return '"%s"' % s
137
138 html_escape_table = {
139 "&": "&amp;",
140 ">": "&gt;",
141 "<": "&lt;",
142 "$": "\$"
143 }
144
145 def html_escape(text):
146 """Produce entities within text."""
147 return "".join(html_escape_table.get(c,c) for c in text)
148
149
150 def html_unescape(text):
151 """Revert entities within text."""
152 t = text.replace('&amp;','&').replace('&gt;','>').replace('&lt;','<').replace('\$','$')
153 return t
154
155 def cmd_exists(cmd):
156 return subprocess.call("type " + cmd, shell=True,
157 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
158
159 def parse_citations(citations_text):
160 """
161 """
162 citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
163 citation_tuples = []
164 for citation in citations:
165 if citation.startswith("doi"):
166 citation_tuples.append( ("doi", citation[len("doi"):].strip() ) )
167 else:
168 citation_tuples.append( ("bibtex", citation[len("bibtex"):].strip() ) )
169 return citation_tuples
170
171 def shell_source(script):
172 """need a way to source a Galaxy tool interpreter env.sh to point at the right dependency package
173 This based on the idea in http://pythonwise.blogspot.fr/2010/04/sourcing-shell-script.html
174 Note that we have to finesse any wierdly quoted newlines in automagic exports using nulls (env -0) as newlines"""
175 pipe = subprocess.Popen("env -i ; . %s ; env -0" % script, stdout=subprocess.PIPE, shell=True)
176 output = pipe.communicate()[0]
177 outl = output.split('\0')
178 outl = [x for x in outl if len(x.split("=")) == 2]
179 newenv = dict((line.split("=", 1) for line in outl))
180 os.environ.update(newenv)
181
182 class ScriptRunner:
183 """class is a wrapper for an arbitrary script
184 note funky templating. this should all be done proper.
185 Problem is, this kludge developed quite naturally and seems to work ok with
186 little overhead...
187
188 """
189
190
191 def __init__(self,opts=None):
192 """
193 cleanup inputs, setup some outputs
194
195 """
196
197 self.toolhtmldepinterpskel = """<?xml version="1.0"?>
198 <tool_dependency>
199 <package name="ghostscript" version="9.10">
200 <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
201 </package>
202 <package name="graphicsmagick" version="1.3.18">
203 <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
204 </package>
205 <package name="%(interpreter_name)s" version="%(interpreter_version)s">
206 <repository name="%(interpreter_pack)s" owner="%(interpreter_owner)s" prior_installation_required="True" />
207 </package>
208
209 <readme>
210 %(readme)s
211 This file was autogenerated by the Galaxy Tool Factory 2
212 </readme>
213 </tool_dependency>
214 """
215
216 self.toolhtmldepskel = """<?xml version="1.0"?>
217 <tool_dependency>
218 <package name="ghostscript" version="9.10">
219 <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
220 </package>
221 <package name="graphicsmagick" version="1.3.18">
222 <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
223 </package>
224 <readme>
225 %(readme)s
226 This file was autogenerated by the Galaxy Tool Factory 2
227 </readme>
228 </tool_dependency>
229 """
230
231 self.emptytoolhtmldepskel = """<?xml version="1.0"?>
232 <tool_dependency>
233 <readme>
234 %(readme)s
235 This file was autogenerated by the Galaxy Tool Factory 2
236 </readme>
237 </tool_dependency>
238 """
239
240 self.protorequirements = """<requirements>
241 <requirement type="package" version="9.10">ghostscript</requirement>
242 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
243 </requirements>"""
244
245 self.protorequirements_interpreter = """<requirements>
246 <requirement type="package" version="9.10">ghostscript</requirement>
247 <requirement type="package" version="1.3.18">graphicsmagick</requirement>
248 <requirement type="package" version="%(interpreter_version)s">%(interpreter_name)s</requirement>
249 </requirements>"""
250
251
252 self.newCommand="""
253 %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
254 --tool_name "%(toolname)s"
255 %(command_inputs)s
256 %(command_outputs)s
257 """
258
259 self.tooltestsTabOnly = """
260 <test>
261 %(test1Inputs)s
262 <param name="job_name" value="test1"/>
263 <param name="runMe" value="$runMe"/>
264 <output name="output1="%(test1Output)s" ftype="tabular"/>
265 %(additionalParams)s
266 </test>
267 """
268
269 self.tooltestsHTMLOnly = """
270 <test>
271 %(test1Inputs)s
272 <param name="job_name" value="test1"/>
273 <param name="runMe" value="$runMe"/>
274 %(additionalParams)s
275 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
276 </test>
277 """
278
279 self.tooltestsBoth = """
280 <test>
281 %(test1Inputs)s
282 <param name="job_name" value="test1"/>
283 <param name="runMe" value="$runMe"/>
284 %(additionalParams)s
285 <output name="output1" file="%(test1Output)s" ftype="tabular" />
286 <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
287 </test>
288 """
289
290 self.newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
291 %(tooldesc)s
292 %(requirements)s
293 <command interpreter="python">
294 %(command)s
295 </command>
296 <inputs>
297 %(inputs)s
298 %(additionalInputs)s
299 </inputs>
300 <outputs>
301 %(outputs)s
302 </outputs>
303 <configfiles>
304 <configfile name="runMe">
305 %(script)s
306 </configfile>
307 </configfiles>
308 <tests>
309 %(tooltests)s
310 </tests>
311 <help>
312
313 %(help)s
314
315 This tool was autogenerated from a user provided script using the Galaxy Tool Factory 2
316 https://toolshed.g2.bx.psu.edu/view/fubar/tool_factory_2
317 </help>
318 <citations>
319 %(citations)s
320 <citation type="doi">10.1093/bioinformatics/bts573</citation>
321 </citations>
322 </tool>"""
323
324 self.useGM = cmd_exists('gm')
325 self.useIM = cmd_exists('convert')
326 self.useGS = cmd_exists('gs')
327 self.temp_warned = False # we want only one warning if $TMP not set
328 if opts.output_dir: # simplify for the tool tarball
329 os.chdir(opts.output_dir)
330 self.thumbformat = 'png'
331 self.opts = opts
332 self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
333 self.toolid = self.toolname
334 self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
335 self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
336 self.xmlfile = '%s.xml' % self.toolname
337 rx = open(self.opts.script_path,'r').readlines()
338 rx = [x.rstrip() for x in rx] # remove pesky dos line endings if needed
339 self.script = '\n'.join(rx)
340 fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
341 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
342 tscript.write(self.script)
343 tscript.close()
344 self.indentedScript = " %s" % '\n'.join([' %s' % html_escape(x) for x in rx]) # for restructured text in help
345 self.escapedScript = "%s" % '\n'.join([' %s' % html_escape(x) for x in rx])
346 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname)
347 if opts.output_dir: # may not want these complexities
348 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname)
349 art = '%s.%s' % (self.toolname,opts.interpreter)
350 artpath = os.path.join(self.opts.output_dir,art) # need full path
351 artifact = open(artpath,'w') # use self.sfile as script source for Popen
352 artifact.write(self.script)
353 artifact.close()
354 self.cl = []
355 self.html = []
356 self.test1Inputs = [] # now a list
357 a = self.cl.append
358 a(opts.interpreter)
359 a(self.sfile)
360 # if multiple inputs - positional or need to distinguish them with cl params
361 if opts.input_tab:
362 tests = []
363 for i,intab in enumerate(opts.input_tab): # if multiple, make tests
364 if intab.find(',') != -1:
365 (gpath,uname) = intab.split(',')
366 else:
367 gpath = uname = intab
368 tests.append(os.path.basename(gpath))
369 self.test1Inputs = '<param name="input_tab" value="%s" />' % (','.join(tests))
370 else:
371 self.test1Inputs = ''
372 # we always pass path,name pairs in using python optparse append
373 # but the command line has to be different
374 self.infile_paths = ''
375 self.infile_names = ''
376 if self.opts.input_tab:
377 self.infile_paths = ','.join([x.split(',')[0].strip() for x in self.opts.input_tab])
378 self.infile_names = ','.join([x.split(',')[1].strip() for x in self.opts.input_tab])
379 if self.opts.interpreter == 'python':
380 # yes, this is how additional parameters are always passed in python - to the TF itself and to
381 # scripts to avoid having unknown parameter names (yes, they can be parsed but...) on the command line
382 if self.opts.input_tab:
383 a('--inpaths=%s' % (self.infile_paths))
384 a('--innames=%s' % (self.infile_names))
385 if self.opts.output_tab:
386 a('--outpath=%s' % self.opts.output_tab)
387 for p in opts.additional_parameters:
388 p = p.replace('"','')
389 psplit = p.split(',')
390 param = html_unescape(psplit[0])
391 value = html_unescape(psplit[1])
392 a('%s="%s"' % (param,value))
393 if (self.opts.interpreter == 'Rscript'):
394 # pass params on command line as expressions which the script evaluates - see sample
395 if self.opts.input_tab:
396 a('INPATHS="%s"' % self.infile_paths)
397 a('INNAMES="%s"' % self.infile_names)
398 if self.opts.output_tab:
399 a('OUTPATH="%s"' % self.opts.output_tab)
400 for p in opts.additional_parameters:
401 p = p.replace('"','')
402 psplit = p.split(',')
403 param = html_unescape(psplit[0])
404 value = html_unescape(psplit[1])
405 a('%s=%s' % (param,quote_non_numeric(value)))
406 if (self.opts.interpreter == 'perl'):
407 # pass positional params on command line - perl script needs to discombobulate the path/name lists
408 if self.opts.input_tab:
409 a('%s' % self.infile_paths)
410 a('%s' % self.infile_names)
411 if self.opts.output_tab:
412 a('%s' % self.opts.output_tab)
413 for p in opts.additional_parameters:
414 # followed by any additional name=value parameter pairs
415 p = p.replace('"','')
416 psplit = p.split(',')
417 param = html_unescape(psplit[0])
418 value = html_unescape(psplit[1])
419 a('%s=%s' % (param,quote_non_numeric(value)))
420 if self.opts.interpreter == 'sh' or self.opts.interpreter == 'bash':
421 # more is better - now move all params into environment AND drop on to command line.
422 self.cl.insert(0,'env')
423 if self.opts.input_tab:
424 self.cl.insert(1,'INPATHS=%s' % (self.infile_paths))
425 self.cl.insert(2,'INNAMES=%s' % (self.infile_names))
426 if self.opts.output_tab:
427 self.cl.insert(3,'OUTPATH=%s' % (self.opts.output_tab))
428 a('OUTPATH=%s' % (self.opts.output_tab))
429 # sets those environment variables for the script
430 # additional params appear in CL - yes, it's confusing
431 for i,p in enumerate(opts.additional_parameters):
432 psplit = p.split(',')
433 param = html_unescape(psplit[0])
434 value = html_unescape(psplit[1])
435 a('%s=%s' % (param,quote_non_numeric(value)))
436 self.cl.insert(4+i,'%s=%s' % (param,quote_non_numeric(value)))
437 self.interpreter_owner = 'SYSTEM'
438 self.interpreter_pack = 'SYSTEM'
439 self.interpreter_name = 'SYSTEM'
440 self.interpreter_version = 'SYSTEM'
441 self.interpreter_revision = 'SYSTEM'
442 if opts.envshpath != 'system': # need to parse out details for our tool_dependency
443 try: # fragile - depends on common naming convention as at jan 2015 = package_[interp]_v0_v1_v2... = version v0.v1.v2.. is in play
444 # this ONLY happens at tool generation by an admin - the generated tool always uses the default of system so path is from local env.sh
445 packdetails = opts.envshpath.split(os.path.sep)[-4:-1] # eg ['fubar', 'package_r_3_1_1', '63cdb9b2234c']
446 self.interpreter_owner = packdetails[0]
447 self.interpreter_pack = packdetails[1]
448 self.interpreter_name = packdetails[1].split('_')[1].upper()
449 self.interpreter_revision = packdetails[2]
450 self.interpreter_version = '.'.join(packdetails[1].split('_')[2:])
451 except:
452 pass
453 self.outFormats = opts.output_format
454 self.inputFormats = opts.input_formats
455 self.test1Output = '%s_test1_output.xls' % self.toolname
456 self.test1HTML = '%s_test1_output.html' % self.toolname
457
458 def makeXML(self):
459 """
460 Create a Galaxy xml tool wrapper for the new script as a string to write out
461 fixme - use templating or something less fugly than this example of what we produce
462
463 <tool id="reverse" name="reverse" version="0.01">
464 <description>a tabular file</description>
465 <command interpreter="python">
466 reverse.py --script_path "$runMe" --interpreter "python"
467 --tool_name "reverse" --input_tab "$input1" --output_tab "$output1"
468 </command>
469 <inputs>
470 <param name="input1" type="data" format="tabular" label="Select one or more input files from your history"/>
471 <param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
472 </inputs>
473 <outputs>
474 <data format="tabular" name="output1q" label="${job_name}"/>
475
476 </outputs>
477 <help>
478
479 **What it Does**
480
481 Reverse the columns in a tabular file
482
483 </help>
484 <configfiles>
485 <configfile name="runMe">
486
487 # reverse order of columns in a tabular file
488 import sys
489 inp = sys.argv[1]
490 outp = sys.argv[2]
491 i = open(inp,'r')
492 o = open(outp,'w')
493 for row in i:
494 rs = row.rstrip().split('\t')
495 rs.reverse()
496 o.write('\t'.join(rs))
497 o.write('\n')
498 i.close()
499 o.close()
500
501
502 </configfile>
503 </configfiles>
504 </tool>
505
506 """
507
508 # these templates need a dict with the right keys to match the parameters - outputs, help, code...
509
510 xdict = {}
511 xdict['additionalParams'] = ''
512 xdict['additionalInputs'] = ''
513 if self.opts.additional_parameters:
514 if self.opts.edit_additional_parameters: # add to new tool form with default value set to original value
515 xdict['additionalInputs'] = '\n'.join(['<param name="%s" value="%s" label="%s" help="%s" type="%s"/>' % \
516 (x.split(',')[0],html_escape(x.split(',')[1]),html_escape(x.split(',')[2]),html_escape(x.split(',')[3]), x.split(',')[4]) for x in self.opts.additional_parameters])
517 xdict['additionalParams'] = '\n'.join(['<param name="%s" value="%s" />' % (x.split(',')[0],html_escape(x.split(',')[1])) for x in self.opts.additional_parameters])
518 xdict['interpreter_owner'] = self.interpreter_owner
519 xdict['interpreter_version'] = self.interpreter_version
520 xdict['interpreter_pack'] = self.interpreter_pack
521 xdict['interpreter_name'] = self.interpreter_name
522 xdict['requirements'] = ''
523 if self.opts.include_dependencies == "yes":
524 if self.opts.envshpath != 'system':
525 xdict['requirements'] = self.protorequirements_interpreter % xdict
526 else:
527 xdict['requirements'] = self.protorequirements
528 xdict['tool_version'] = self.opts.tool_version
529 xdict['test1HTML'] = self.test1HTML
530 xdict['test1Output'] = self.test1Output
531 xdict['test1Inputs'] = self.test1Inputs
532 if self.opts.make_HTML and self.opts.output_tab:
533 xdict['tooltests'] = self.tooltestsBoth % xdict
534 elif self.opts.make_HTML:
535 xdict['tooltests'] = self.tooltestsHTMLOnly % xdict
536 else:
537 xdict['tooltests'] = self.tooltestsTabOnly % xdict
538 xdict['script'] = self.escapedScript
539 # configfile is least painful way to embed script to avoid external dependencies
540 # but requires escaping of <, > and $ to avoid Mako parsing
541 if self.opts.help_text:
542 helptext = open(self.opts.help_text,'r').readlines()
543 helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek
544 xdict['help'] = ''.join([x for x in helptext])
545 else:
546 xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email)
547 coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
548 coda.append('\n')
549 coda.append(self.indentedScript)
550 coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow()))
551 coda.append('See %s for details of that project' % (toolFactoryURL))
552 coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ')
553 coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n')
554 xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
555 if self.opts.tool_desc:
556 xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
557 else:
558 xdict['tooldesc'] = ''
559 xdict['command_outputs'] = ''
560 xdict['outputs'] = ''
561 if self.opts.input_tab:
562 cins = ['\n',]
563 cins.append('--input_formats %s' % self.opts.input_formats)
564 cins.append('#for intab in $input1:')
565 cins.append('--input_tab "${intab},${intab.name}"')
566 cins.append('#end for\n')
567 xdict['command_inputs'] = '\n'.join(cins)
568 xdict['inputs'] = '''<param name="input_tab" multiple="true" type="data" format="%s" label="Select one or more %s input files from your history"
569 help="Multiple inputs may be selected assuming the script can deal with them..."/> \n''' % (self.inputFormats,self.inputFormats)
570 else:
571 xdict['command_inputs'] = '' # assume no input - eg a random data generator
572 xdict['inputs'] = ''
573 if (len(self.opts.additional_parameters) > 0):
574 cins = ['\n',]
575 for params in self.opts.additional_parameters:
576 psplit = params.split(',') # name,value...
577 psplit[3] = html_escape(psplit[3])
578 if self.opts.edit_additional_parameters:
579 psplit[1] = '$%s' % psplit[0] # replace with form value
580 else:
581 psplit[1] = html_escape(psplit[1]) # leave prespecified value
582 cins.append('--additional_parameters """%s"""' % ','.join(psplit))
583 xdict['command_inputs'] = '%s\n%s' % (xdict['command_inputs'],'\n'.join(cins))
584 xdict['inputs'] += '<param name="job_name" type="text" size="60" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
585 xdict['toolname'] = self.toolname
586 xdict['toolid'] = self.toolid
587 xdict['interpreter'] = self.opts.interpreter
588 xdict['scriptname'] = self.sfile
589 if self.opts.make_HTML:
590 xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"'
591 xdict['outputs'] += ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
592 else:
593 xdict['command_outputs'] += ' --output_dir "./"'
594 if self.opts.output_tab:
595 xdict['command_outputs'] += ' --output_tab "$output1"'
596 xdict['outputs'] += ' <data format="%s" name="output1" label="${job_name}"/>\n' % self.outFormats
597 xdict['command'] = self.newCommand % xdict
598 if self.opts.citations:
599 citationstext = open(self.opts.citations,'r').read()
600 citation_tuples = parse_citations(citationstext)
601 citations_xml = ""
602 for citation_type, citation_content in citation_tuples:
603 citation_xml = """<citation type="%s">%s</citation>""" % (citation_type, html_escape(citation_content))
604 citations_xml += citation_xml
605 xdict['citations'] = citations_xml
606 else:
607 xdict['citations'] = ""
608 xmls = self.newXML % xdict
609 xf = open(self.xmlfile,'w')
610 xf.write(xmls)
611 xf.write('\n')
612 xf.close()
613 # ready for the tarball
614
615
616 def makeTooltar(self):
617 """
618 a tool is a gz tarball with eg
619 /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
620 """
621 retval = self.run()
622 if retval:
623 print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
624 sys.exit(1)
625 tdir = self.toolname
626 os.mkdir(tdir)
627 self.makeXML()
628 if self.opts.help_text:
629 hlp = open(self.opts.help_text,'r').read()
630 else:
631 hlp = 'Please ask the tool author for help as none was supplied at tool generation\n'
632 readme_dict = {'readme':hlp,'interpreter':self.opts.interpreter,'interpreter_version':self.interpreter_version,'interpreter_name':self.interpreter_name,
633 'interpreter_owner':self.interpreter_owner,'interpreter_pack':self.interpreter_pack}
634 if self.opts.include_dependencies == "yes":
635 if self.opts.envshpath == 'system':
636 tooldepcontent = self.toolhtmldepskel % readme_dict
637 else:
638 tooldepcontent = self.toolhtmldepinterpskel % readme_dict
639 else:
640 tooldepcontent = self.emptytoolhtmldepskel % readme_dict
641 depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w')
642 depf.write(tooldepcontent)
643 depf.write('\n')
644 depf.close()
645 testdir = os.path.join(tdir,'test-data')
646 os.mkdir(testdir) # make tests directory
647 for i,intab in enumerate(self.opts.input_tab):
648 si = self.opts.input_tab[i]
649 if si.find(',') != -1:
650 s = si.split(',')[0]
651 si = s
652 dest = os.path.join(testdir,os.path.basename(si))
653 if si != dest:
654 shutil.copyfile(si,dest)
655 if self.opts.output_tab:
656 shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
657 if self.opts.make_HTML:
658 shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
659 if self.opts.output_dir:
660 shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
661 outpif = '%s.py' % self.toolname # new name
662 outpiname = os.path.join(tdir,outpif) # path for the tool tarball
663 pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
664 notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),]
665 notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
666 notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
667 pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
668 notes += pi
669 outpi = open(outpiname,'w')
670 outpi.write(''.join(notes))
671 outpi.write('\n')
672 outpi.close()
673 stname = os.path.join(tdir,self.sfile)
674 if not os.path.exists(stname):
675 shutil.copyfile(self.sfile, stname)
676 xtname = os.path.join(tdir,self.xmlfile)
677 if not os.path.exists(xtname):
678 shutil.copyfile(self.xmlfile,xtname)
679 tarpath = "%s.tar.gz" % self.toolname
680 tar = tarfile.open(tarpath, "w:gz")
681 tar.add(tdir,arcname='%s' % self.toolname)
682 tar.close()
683 shutil.copyfile(tarpath,self.opts.new_tool)
684 shutil.rmtree(tdir)
685 ## TODO: replace with optional direct upload to local toolshed?
686 return retval
687
688
689 def compressPDF(self,inpdf=None,thumbformat='png'):
690 """need absolute path to pdf
691 note that GS gets confoozled if no $TMP or $TEMP
692 so we set it
693 """
694 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
695 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
696 sto = open(hlog,'a')
697 our_env = os.environ.copy()
698 our_tmp = our_env.get('TMP',None)
699 if not our_tmp:
700 our_tmp = our_env.get('TEMP',None)
701 if not (our_tmp and os.path.exists(our_tmp)):
702 newtmp = os.path.join(self.opts.output_dir,'tmp')
703 try:
704 os.mkdir(newtmp)
705 except:
706 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
707 our_env['TEMP'] = newtmp
708 if not self.temp_warned:
709 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
710 self.temp_warned = True
711 outpdf = '%s_compressed' % inpdf
712 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
713 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
714 retval1 = x.wait()
715 sto.close()
716 if retval1 == 0:
717 os.unlink(inpdf)
718 shutil.move(outpdf,inpdf)
719 os.unlink(hlog)
720 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
721 sto = open(hlog,'w')
722 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
723 if self.useGM:
724 cl2 = ['gm', 'convert', inpdf, outpng]
725 else: # assume imagemagick
726 cl2 = ['convert', inpdf, outpng]
727 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
728 retval2 = x.wait()
729 sto.close()
730 if retval2 == 0:
731 os.unlink(hlog)
732 retval = retval1 or retval2
733 return retval
734
735
736 def getfSize(self,fpath,outpath):
737 """
738 format a nice file size string
739 """
740 size = ''
741 fp = os.path.join(outpath,fpath)
742 if os.path.isfile(fp):
743 size = '0 B'
744 n = float(os.path.getsize(fp))
745 if n > 2**20:
746 size = '%1.1f MB' % (n/2**20)
747 elif n > 2**10:
748 size = '%1.1f KB' % (n/2**10)
749 elif n > 0:
750 size = '%d B' % (int(n))
751 return size
752
753 def makeHtml(self):
754 """ Create an HTML file content to list all the artifacts found in the output_dir
755 """
756
757 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
758 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
759 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
760 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
761 <title></title>
762 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
763 </head>
764 <body>
765 <div class="toolFormBody">
766 """
767 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>"""
768 galhtmlpostfix = """</div></body></html>\n"""
769
770 flist = os.listdir(self.opts.output_dir)
771 flist = [x for x in flist if x != 'Rplots.pdf']
772 flist.sort()
773 html = []
774 html.append(galhtmlprefix % progname)
775 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
776 fhtml = []
777 if len(flist) > 0:
778 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
779 logfiles.sort()
780 logfiles = [x for x in logfiles if os.path.abspath(x) != os.path.abspath(self.tlog)]
781 logfiles.append(os.path.abspath(self.tlog)) # make it the last one
782 pdflist = []
783 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
784 for rownum,fname in enumerate(flist):
785 dname,e = os.path.splitext(fname)
786 sfsize = self.getfSize(fname,self.opts.output_dir)
787 if e.lower() == '.pdf' : # compress and make a thumbnail
788 thumb = '%s.%s' % (dname,self.thumbformat)
789 pdff = os.path.join(self.opts.output_dir,fname)
790 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
791 if retval == 0:
792 pdflist.append((fname,thumb))
793 else:
794 pdflist.append((fname,fname))
795 if (rownum+1) % 2 == 0:
796 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
797 else:
798 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
799 for logfname in logfiles: # expect at least tlog - if more
800 if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later
801 sectionname = 'All tool run'
802 if (len(logfiles) > 1):
803 sectionname = 'Other'
804 ourpdfs = pdflist
805 else:
806 realname = os.path.basename(logfname)
807 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
808 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
809 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] != sectionname] # remove
810 nacross = 1
811 npdf = len(ourpdfs)
812
813 if npdf > 0:
814 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
815 if int(nacross)**2 != npdf:
816 nacross += 1
817 nacross = int(nacross)
818 width = min(400,int(1200/nacross))
819 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
820 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
821 ntogo = nacross # counter for table row padding with empty cells
822 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
823 for i,paths in enumerate(ourpdfs):
824 fname,thumb = paths
825 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d"
826 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
827 if ((i+1) % nacross == 0):
828 s += '</tr>\n'
829 ntogo = 0
830 if i < (npdf - 1): # more to come
831 s += '<tr>'
832 ntogo = nacross
833 else:
834 ntogo -= 1
835 html.append(s)
836 if html[-1].strip().endswith('</tr>'):
837 html.append('</table></div>\n')
838 else:
839 if ntogo > 0: # pad
840 html.append('<td>&nbsp;</td>'*ntogo)
841 html.append('</tr></table></div>\n')
842 logt = open(logfname,'r').readlines()
843 logtext = [x for x in logt if x.strip() > '']
844 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
845 if len(logtext) > 1:
846 html.append('\n<pre>\n')
847 html += logtext
848 html.append('\n</pre>\n')
849 else:
850 html.append('%s is empty<br/>' % logfname)
851 if len(fhtml) > 0:
852 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
853 fhtml.append('</table></div><br/>')
854 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
855 html += fhtml # add all non-pdf files to the end of the display
856 else:
857 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
858 html.append(galhtmlpostfix)
859 htmlf = file(self.opts.output_html,'w')
860 htmlf.write('\n'.join(html))
861 htmlf.write('\n')
862 htmlf.close()
863 self.html = html
864
865
866
867 def run(self):
868 """
869 Some devteam tools have this defensive stderr read so I'm keeping with the faith
870 Feel free to update.
871 """
872 if self.opts.envshpath != 'system':
873 shell_source(self.opts.envshpath)
874 # this only happens at tool generation - the generated tool relies on the dependencies all being set up
875 # at toolshed installation by sourcing local env.sh
876 if self.opts.output_dir:
877 ste = open(self.elog,'wb')
878 sto = open(self.tlog,'wb')
879 s = ' '.join(self.cl)
880 sto.write('## Executing Toolfactory generated command line = %s\n' % s)
881 sto.flush()
882 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,cwd=self.opts.output_dir)
883 retval = p.wait()
884 sto.close()
885 ste.close()
886 tmp_stderr = open( self.elog, 'rb' )
887 err = ''
888 buffsize = 1048576
889 try:
890 while True:
891 err += tmp_stderr.read( buffsize )
892 if not err or len( err ) % buffsize != 0:
893 break
894 except OverflowError:
895 pass
896 tmp_stderr.close()
897 else:
898 p = subprocess.Popen(self.cl,shell=False)
899 retval = p.wait()
900 if self.opts.output_dir:
901 if retval != 0 and err: # problem
902 print >> sys.stderr,err
903 if self.opts.make_HTML:
904 self.makeHtml()
905 return retval
906
907
908
909 def main():
910 u = """
911 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
912 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
913 </command>
914 """
915 op = optparse.OptionParser()
916 a = op.add_option
917 a('--script_path',default=None)
918 a('--tool_name',default=None)
919 a('--interpreter',default=None)
920 a('--output_dir',default='./')
921 a('--output_html',default=None)
922 a('--input_tab',default=[], action="append") # these are "galaxypath,metadataname" pairs
923 a("--input_formats",default="tabular")
924 a('--output_tab',default=None)
925 a('--output_format',default='tabular')
926 a('--user_email',default='Unknown')
927 a('--bad_user',default=None)
928 a('--make_Tool',default=None)
929 a('--make_HTML',default=None)
930 a('--help_text',default=None)
931 a('--tool_desc',default=None)
932 a('--new_tool',default=None)
933 a('--tool_version',default=None)
934 a('--include_dependencies',default=None)
935 a('--citations',default=None)
936 a('--additional_parameters', dest='additional_parameters', action='append', default=[])
937 a('--edit_additional_parameters', action="store_true", default=False)
938 a('--envshpath',default="system")
939 opts, args = op.parse_args()
940 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
941 assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
942 assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
943 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
944 if opts.output_dir:
945 try:
946 os.makedirs(opts.output_dir)
947 except:
948 pass
949 opts.input_tab = [x.replace('"','').replace("'",'') for x in opts.input_tab]
950 for i,x in enumerate(opts.additional_parameters): # remove quotes we need to deal with spaces in CL params
951 opts.additional_parameters[i] = opts.additional_parameters[i].replace('"','')
952 r = ScriptRunner(opts)
953 if opts.make_Tool:
954 retcode = r.makeTooltar()
955 else:
956 retcode = r.run()
957 os.unlink(r.sfile)
958 if retcode:
959 sys.exit(retcode) # indicate failure to job runner
960
961
962 if __name__ == "__main__":
963 main()
964
965
966