# rgToolFactoryMultIn.py
# see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
#
# copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
#
# all rights reserved
# Licensed under the LGPL
# suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
#
# January 2015
# in the process of building a complex tool
# added ability to choose one of the current toolshed package_r or package_perl or package_python dependencies and source that package
# need to add that package to tool_dependencies
#
# sept 2014 added additional params from
# https://bitbucket.org/mvdbeek/dockertoolfactory/src/d4863bcf7b521532c7e8c61b6333840ba5393f73/DockerToolFactory.py?at=default
# passing them is complex
# and they are restricted to NOT contain commas or double quotes to ensure that they can be safely passed together on
# the toolfactory command line as a comma delimited double quoted string for parsing and passing to the script
# see examples on this tool form
# august 2014
# Allows arbitrary number of input files
# NOTE positional parameters are now passed to script
# and output (may be "None") is *before* arbitrary number of inputs
#
# march 2014
# had to remove dependencies because cross toolshed dependencies are not possible - can't pre-specify a toolshed url for graphicsmagick and ghostscript
# grrrrr - night before a demo
# added dependencies to a tool_dependencies.xml if html page generated so generated tool is properly portable
#
# added ghostscript and graphicsmagick as dependencies
# fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp
# errors ensued
#
# august 2013
# found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn
#
# july 2013
# added ability to combine images and individual log files into html output
# just make sure there's a log file foo.log and it will be output
# together with all images named like "foo_*.pdf
# otherwise old format for html
#
# January 2013
# problem pointed out by Carlos Borroto
# added escaping for <>$ - thought I did that ages ago...
#
# August 11 2012
# changed to use shell=False and cl as a sequence
# This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
# It also serves as the wrapper for the new tool.
#
# you paste and run your script
# Only works for simple scripts that read one input from the history.
# Optionally can write one new history dataset,
# and optionally collect any number of outputs into links on an autogenerated HTML page.
# DO NOT install on a public or important site - please.
# installed generated tools are fine if the script is safe.
# They just run normally and their user cannot do anything unusually insecure
# but please, practice safe toolshed.
# Read the fucking code before you install any tool
# especially this one
# After you get the script working on some test data, you can
# optionally generate a toolshed compatible gzip file
# containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
# safe and largely automated installation in a production Galaxy.
# If you opt for an HTML output, you get all the script outputs arranged
# as a single Html history item - all output files are linked, thumbnails for all the pdfs.
# Ugly but really inexpensive.
#
# Patches appreciated please.
#
#
# long route to June 2012 product
# Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them
# derived from an integrated script model
# called rgBaseScriptWrapper.py
# Note to the unwary:
# This tool allows arbitrary scripting on your Galaxy as the Galaxy user
# There is nothing stopping a malicious user doing whatever they choose
# Extremely dangerous!!
# Totally insecure. So, trusted users only
#
# preferred model is a developer using their throw away workstation instance - ie a private site.
# no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
#
import sys
import shutil
import subprocess
import os
import time
import tempfile
import optparse
import tarfile
import re
import shutil
import math
progname = os.path.split(sys.argv[0])[1]
myversion = 'V001.1 March 2014'
verbose = False
debug = False
toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'
# if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated
# tool xml
def timenow():
"""return current time as a string
"""
return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
html_escape_table = {
"&": "&",
">": ">",
"<": "<",
"$": "\$"
}
def html_escape(text):
"""Produce entities within text."""
return "".join(html_escape_table.get(c,c) for c in text)
def html_unescape(text):
"""Revert entities within text."""
t = text.replace('&','&').replace('>','>').replace('<','<').replace('\$','$')
return t
def cmd_exists(cmd):
return subprocess.call("type " + cmd, shell=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
def parse_citations(citations_text):
"""
"""
citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
citation_tuples = []
for citation in citations:
if citation.startswith("doi"):
citation_tuples.append( ("doi", citation[len("doi"):].strip() ) )
else:
citation_tuples.append( ("bibtex", citation[len("bibtex"):].strip() ) )
return citation_tuples
def shell_source(script):
"""need a way to source a Galaxy tool interpreter env.sh so we can use that dependency
package
see http://pythonwise.blogspot.fr/2010/04/sourcing-shell-script.html
Sometime you want to emulate the action of "source" in bash,
settings some environment variables. Here is a way to do it.
Note that we have to finesse the automagic exports using nulls as newlines for env"""
pipe = subprocess.Popen("env -i ; . %s ; env -0" % script, stdout=subprocess.PIPE, shell=True)
output = pipe.communicate()[0]
outl = output.split('\0')
outl = [x for x in outl if len(x.split("=")) == 2]
newenv = dict((line.split("=", 1) for line in outl))
os.environ.update(newenv)
class ScriptRunner:
"""class is a wrapper for an arbitrary script
note funky templating. this should all be done proper.
Problem is, this kludge developed quite naturally and seems to work ok with
little overhead...
"""
def __init__(self,opts=None,treatbashSpecial=True):
"""
cleanup inputs, setup some outputs
"""
self.toolhtmldepinterpskel = """
%(readme)s
"""
self.toolhtmldepskel = """
%(readme)s
"""
self.emptytoolhtmldepskel = """
%(readme)s
"""
self.protorequirements = """ghostscriptgraphicsmagick"""
self.protorequirements_interpreter = """ghostscriptgraphicsmagick%(interpreter_name)s"""
self.newCommand="""
%(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s"
--tool_name "%(toolname)s"
%(command_inputs)s
%(command_outputs)s
"""
self.tooltestsTabOnly = """
%(test1Inputs)s
%(additionalParams)s
"""
self.tooltestsHTMLOnly = """
%(test1Inputs)s
%(additionalParams)s
"""
self.tooltestsBoth = """
%(test1Inputs)s
%(additionalParams)s
"""
self.newXML="""
%(tooldesc)s
%(requirements)s
%(command)s
%(inputs)s
%(additionalInputs)s
%(outputs)s
%(script)s
%(tooltests)s
%(help)s
%(citations)s
10.1093/bioinformatics/bts573"""
self.useGM = cmd_exists('gm')
self.useIM = cmd_exists('convert')
self.useGS = cmd_exists('gs')
self.temp_warned = False # we want only one warning if $TMP not set
self.treatbashSpecial = treatbashSpecial
if opts.output_dir: # simplify for the tool tarball
os.chdir(opts.output_dir)
self.thumbformat = 'png'
self.opts = opts
self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
self.toolid = self.toolname
self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
self.xmlfile = '%s.xml' % self.toolname
rx = open(self.opts.script_path,'r').readlines()
rx = [x.rstrip() for x in rx] # remove pesky dos line endings if needed
self.script = '\n'.join(rx)
fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
tscript.write(self.script)
tscript.close()
self.indentedScript = " %s" % '\n'.join([' %s' % html_escape(x) for x in rx]) # for restructured text in help
self.escapedScript = "%s" % '\n'.join([' %s' % html_escape(x) for x in rx])
self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname)
if opts.output_dir: # may not want these complexities
self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname)
art = '%s.%s' % (self.toolname,opts.interpreter)
artpath = os.path.join(self.opts.output_dir,art) # need full path
artifact = open(artpath,'w') # use self.sfile as script source for Popen
artifact.write(self.script)
artifact.close()
self.cl = []
self.html = []
self.test1Inputs = [] # now a list
a = self.cl.append
a(opts.interpreter)
a(self.sfile)
# if multiple inputs - positional or need to distinguish them with cl params
if opts.input_tab:
tests = []
for i,intab in enumerate(opts.input_tab): # if multiple, make tests
if intab.find(',') <> -1:
(gpath,uname) = intab.split(',')
else:
gpath = uname = intab
tests.append(os.path.basename(gpath))
self.test1Inputs = '' % (','.join(tests))
else:
self.test1Inputs = ''
# we always pass path,name pairs in using python optparse append
# but the command line has to be different
self.infile_paths = ''
self.infile_names = ''
if self.opts.input_tab:
self.infile_paths = ','.join([x.split(',')[0].strip() for x in self.opts.input_tab])
self.infile_names = ','.join([x.split(',')[1].strip() for x in self.opts.input_tab])
if self.opts.interpreter == 'python':
# yes, this is how additional parameters are always passed in python - to the TF itself and to
# scripts to avoid having unknown parameter names (yes, they can be parsed but...) on the command line
if self.opts.input_tab:
a('--INPATHS "%s"' % (self.infile_paths))
a('--INNAMES "%s"' % (self.infile_names))
if self.opts.output_tab:
a('--OUTPATH "%s"' % self.opts.output_tab)
for p in opts.additional_parameters:
p = p.replace('"','')
psplit = p.split(',')
param = html_unescape(psplit[0])
value = html_unescape(psplit[1])
a('%s="%s"' % (param,value))
if (self.opts.interpreter == 'Rscript'):
# pass params on command line
if self.opts.input_tab:
a('INPATHS="%s"' % self.infile_paths)
a('INNAMES="%s"' % self.infile_names)
if self.opts.output_tab:
a('OUTPATH="%s"' % self.opts.output_tab)
for p in opts.additional_parameters:
p = p.replace('"','')
psplit = p.split(',')
param = html_unescape(psplit[0])
value = html_unescape(psplit[1])
a('%s="%s"' % (param,value))
if (self.opts.interpreter == 'perl'):
# pass params on command line
if self.opts.input_tab:
a('%s' % self.infile_paths)
a('%s' % self.infile_names)
if self.opts.output_tab:
a('%s' % self.opts.output_tab)
for p in opts.additional_parameters:
p = p.replace('"','')
psplit = p.split(',')
param = html_unescape(psplit[0])
value = html_unescape(psplit[1])
if (value.find(' ') <> -1):
a('%s="%s"' % (param,value))
else:
a('%s=%s' % (param,value))
if self.opts.interpreter == 'sh' or self.opts.interpreter == 'bash':
# more is better - now move all params into environment AND drop on to command line.
self.cl.insert(0,'env')
if self.opts.input_tab:
self.cl.insert(1,'INPATHS=%s' % (self.infile_paths))
self.cl.insert(2,'INNAMES=%s' % (self.infile_names))
if self.opts.output_tab:
self.cl.insert(3,'OUTPATH=%s' % (self.opts.output_tab))
a('OUTPATH=%s' % (self.opts.output_tab))
# sets those environment variables for the script
# additional params appear in CL - yes, it's confusing
for i,p in enumerate(opts.additional_parameters):
psplit = p.split(',')
param = html_unescape(psplit[0])
value = html_unescape(psplit[1])
if (value.find(' ') <> -1):
a('%s="%s"' % (param,value))
self.cl.insert(4+i,'%s="%s"' % (param,value))
else:
a('%s=%s' % (param,value))
self.cl.insert(4+i,'%s=%s' % (param,value))
self.interp_owner = None
self.interp_pack = None
self.interp_revision = None
self.interp_version = None
if opts.envshpath <> 'system': # need to parse out details for our tool_dependency
try:
packdetails = opts.envshpath.split(os.path.sep)[-4:-1] # eg ['fubar', 'package_r_3_1_1', '63cdb9b2234c']
self.interpreter_owner = packdetails[0]
self.interpreter_pack = packdetails[1]
self.interpreter_revision = packdetails[2]
self.interpreter_version = '.'.join(self.interpreter_pack.split('_')[2:])
# hope our naming convention as at jan 2015 = package_[interp]_v0_v1_v2... = version v0.v1.v2.. is in play
except:
pass
self.outFormats = opts.output_format
self.inputFormats = opts.input_formats
self.test1Output = '%s_test1_output.xls' % self.toolname
self.test1HTML = '%s_test1_output.html' % self.toolname
def makeXML(self):
"""
Create a Galaxy xml tool wrapper for the new script as a string to write out
fixme - use templating or something less fugly than this example of what we produce
a tabular file
reverse.py --script_path "$runMe" --interpreter "python"
--tool_name "reverse" --input_tab "$input1" --output_tab "$output1"
**What it Does**
Reverse the columns in a tabular file
# reverse order of columns in a tabular file
import sys
inp = sys.argv[1]
outp = sys.argv[2]
i = open(inp,'r')
o = open(outp,'w')
for row in i:
rs = row.rstrip().split('\t')
rs.reverse()
o.write('\t'.join(rs))
o.write('\n')
i.close()
o.close()
"""
# these templates need a dict with the right keys to match the parameters - outputs, help, code...
xdict = {}
xdict['additionalParams'] = ''
xdict['additionalInputs'] = ''
if self.opts.additional_parameters:
if self.opts.edit_additional_parameters: # add to new tool form with default value set to original value
xdict['additionalInputs'] = '\n'.join(['' % \
(x.split(',')[0],html_escape(x.split(',')[1]),html_escape(x.split(',')[2]),html_escape(x.split(',')[3]), x.split(',')[4]) for x in self.opts.additional_parameters])
xdict['additionalParams'] = '\n'.join(['' % (x.split(',')[0],html_escape(x.split(',')[1])) for x in self.opts.additional_parameters])
xdict['interpreter_owner'] = self.interpreter_owner
xdict['interpreter_version'] = self.interpreter_version
xdict['interpreter_name'] = self.interpreter_pack
xdict['requirements'] = ''
if self.opts.include_dependencies == "yes":
if self.opts.envshpath <> 'system':
xdict['requirements'] = self.protorequirements_interpreter % xdict
else:
xdict['requirements'] = self.protorequirements
xdict['tool_version'] = self.opts.tool_version
xdict['test1HTML'] = self.test1HTML
xdict['test1Output'] = self.test1Output
xdict['test1Inputs'] = self.test1Inputs
if self.opts.make_HTML and self.opts.output_tab:
xdict['tooltests'] = self.tooltestsBoth % xdict
elif self.opts.make_HTML:
xdict['tooltests'] = self.tooltestsHTMLOnly % xdict
else:
xdict['tooltests'] = self.tooltestsTabOnly % xdict
xdict['script'] = self.escapedScript
# configfile is least painful way to embed script to avoid external dependencies
# but requires escaping of <, > and $ to avoid Mako parsing
if self.opts.help_text:
helptext = open(self.opts.help_text,'r').readlines()
helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek
xdict['help'] = ''.join([x for x in helptext])
else:
xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email)
coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
coda.append('\n')
coda.append(self.indentedScript)
coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow()))
coda.append('See %s for details of that project' % (toolFactoryURL))
coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ')
coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n')
xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
if self.opts.tool_desc:
xdict['tooldesc'] = '%s' % self.opts.tool_desc
else:
xdict['tooldesc'] = ''
xdict['command_outputs'] = ''
xdict['outputs'] = ''
if self.opts.input_tab:
cins = ['\n',]
cins.append('--input_formats %s' % self.opts.input_formats)
cins.append('#for intab in $input1:')
cins.append('--input_tab "${intab},${intab.name}"')
cins.append('#end for\n')
xdict['command_inputs'] = '\n'.join(cins)
xdict['inputs'] = ''' \n''' % (self.inputFormats,self.inputFormats)
else:
xdict['command_inputs'] = '' # assume no input - eg a random data generator
xdict['inputs'] = ''
if (len(self.opts.additional_parameters) > 0):
cins = ['\n',]
for params in self.opts.additional_parameters:
psplit = params.split(',') # name,value...
psplit[3] = html_escape(psplit[3])
if self.opts.edit_additional_parameters:
psplit[1] = '$%s' % psplit[0] # replace with form value
else:
psplit[1] = html_escape(psplit[1]) # leave prespecified value
cins.append('--additional_parameters """%s"""' % ','.join(psplit))
xdict['command_inputs'] = '%s\n%s' % (xdict['command_inputs'],'\n'.join(cins))
xdict['inputs'] += ' \n' % self.toolname
xdict['toolname'] = self.toolname
xdict['toolid'] = self.toolid
xdict['interpreter'] = self.opts.interpreter
xdict['scriptname'] = self.sfile
if self.opts.make_HTML:
xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"'
xdict['outputs'] += ' \n'
else:
xdict['command_outputs'] += ' --output_dir "./"'
if self.opts.output_tab:
xdict['command_outputs'] += ' --output_tab "$output1"'
xdict['outputs'] += ' \n' % self.outFormats
xdict['command'] = self.newCommand % xdict
if self.opts.citations:
citationstext = open(self.opts.citations,'r').read()
citation_tuples = parse_citations(citationstext)
citations_xml = ""
for citation_type, citation_content in citation_tuples:
citation_xml = """%s""" % (citation_type, html_escape(citation_content))
citations_xml += citation_xml
xdict['citations'] = citations_xml
else:
xdict['citations'] = ""
xmls = self.newXML % xdict
xf = open(self.xmlfile,'w')
xf.write(xmls)
xf.write('\n')
xf.close()
# ready for the tarball
def makeTooltar(self):
"""
a tool is a gz tarball with eg
/toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
"""
retval = self.run()
if retval:
print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
sys.exit(1)
tdir = self.toolname
os.mkdir(tdir)
self.makeXML()
if self.opts.help_text:
hlp = open(self.opts.help_text,'r').read()
else:
hlp = 'Please ask the tool author for help as none was supplied at tool generation\n'
readme_dict = {'readme':hlp,'interpreter':self.opts.interpreter,'interpreter_version':self.interpreter_version,'interpreter_name':self.interpreter_pack,
'interpreter_owner':self.interpreter_owner}
if self.opts.include_dependencies == "yes":
if self.opts.envshpath == 'system':
tooldepcontent = self.toolhtmldepskel % readme_dict
else:
tooldepcontent = self.toolhtmldepinterpskel % readme_dict
else:
tooldepcontent = self.emptytoolhtmldepskel % readme_dictls -l
depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w')
depf.write(tooldepcontent)
depf.write('\n')
depf.close()
if self.opts.input_tab: # no reproducible test otherwise? TODO: maybe..
testdir = os.path.join(tdir,'test-data')
os.mkdir(testdir) # make tests directory
for i,intab in enumerate(self.opts.input_tab):
si = self.opts.input_tab[i]
if si.find(',') <> -1:
s = si.split(',')[0]
si = s
dest = os.path.join(testdir,os.path.basename(si))
if si <> dest:
shutil.copyfile(si,dest)
if self.opts.output_tab:
shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
if self.opts.make_HTML:
shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
if self.opts.output_dir:
shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
outpif = '%s.py' % self.toolname # new name
outpiname = os.path.join(tdir,outpif) # path for the tool tarball
pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),]
notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
notes += pi
outpi = open(outpiname,'w')
outpi.write(''.join(notes))
outpi.write('\n')
outpi.close()
stname = os.path.join(tdir,self.sfile)
if not os.path.exists(stname):
shutil.copyfile(self.sfile, stname)
xtname = os.path.join(tdir,self.xmlfile)
if not os.path.exists(xtname):
shutil.copyfile(self.xmlfile,xtname)
tarpath = "%s.tar.gz" % self.toolname
tar = tarfile.open(tarpath, "w:gz")
tar.add(tdir,arcname='%s' % self.toolname)
tar.close()
shutil.copyfile(tarpath,self.opts.new_tool)
shutil.rmtree(tdir)
## TODO: replace with optional direct upload to local toolshed?
return retval
def compressPDF(self,inpdf=None,thumbformat='png'):
"""need absolute path to pdf
note that GS gets confoozled if no $TMP or $TEMP
so we set it
"""
assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
sto = open(hlog,'a')
our_env = os.environ.copy()
our_tmp = our_env.get('TMP',None)
if not our_tmp:
our_tmp = our_env.get('TEMP',None)
if not (our_tmp and os.path.exists(our_tmp)):
newtmp = os.path.join(self.opts.output_dir,'tmp')
try:
os.mkdir(newtmp)
except:
sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
our_env['TEMP'] = newtmp
if not self.temp_warned:
sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
self.temp_warned = True
outpdf = '%s_compressed' % inpdf
cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
retval1 = x.wait()
sto.close()
if retval1 == 0:
os.unlink(inpdf)
shutil.move(outpdf,inpdf)
os.unlink(hlog)
hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
sto = open(hlog,'w')
outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
if self.useGM:
cl2 = ['gm', 'convert', inpdf, outpng]
else: # assume imagemagick
cl2 = ['convert', inpdf, outpng]
x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
retval2 = x.wait()
sto.close()
if retval2 == 0:
os.unlink(hlog)
retval = retval1 or retval2
return retval
def getfSize(self,fpath,outpath):
"""
format a nice file size string
"""
size = ''
fp = os.path.join(outpath,fpath)
if os.path.isfile(fp):
size = '0 B'
n = float(os.path.getsize(fp))
if n > 2**20:
size = '%1.1f MB' % (n/2**20)
elif n > 2**10:
size = '%1.1f KB' % (n/2**10)
elif n > 0:
size = '%d B' % (int(n))
return size
def makeHtml(self):
""" Create an HTML file content to list all the artifacts found in the output_dir
"""
galhtmlprefix = """
\n"""
flist = os.listdir(self.opts.output_dir)
flist = [x for x in flist if x <> 'Rplots.pdf']
flist.sort()
html = []
html.append(galhtmlprefix % progname)
html.append('
Galaxy Tool "%s" run at %s
' % (self.toolname,timenow()))
fhtml = []
if len(flist) > 0:
logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
logfiles.sort()
logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)]
logfiles.append(os.path.abspath(self.tlog)) # make it the last one
pdflist = []
npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
for rownum,fname in enumerate(flist):
dname,e = os.path.splitext(fname)
sfsize = self.getfSize(fname,self.opts.output_dir)
if e.lower() == '.pdf' : # compress and make a thumbnail
thumb = '%s.%s' % (dname,self.thumbformat)
pdff = os.path.join(self.opts.output_dir,fname)
retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
if retval == 0:
pdflist.append((fname,thumb))
else:
pdflist.append((fname,fname))
if (rownum+1) % 2 == 0:
fhtml.append('
' % (fname,fname,sfsize))
for logfname in logfiles: # expect at least tlog - if more
if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later
sectionname = 'All tool run'
if (len(logfiles) > 1):
sectionname = 'Other'
ourpdfs = pdflist
else:
realname = os.path.basename(logfname)
sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
nacross = 1
npdf = len(ourpdfs)
if npdf > 0:
nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
if int(nacross)**2 != npdf:
nacross += 1
nacross = int(nacross)
width = min(400,int(1200/nacross))
html.append('
%s images and outputs
' % sectionname)
html.append('(Click on a thumbnail image to download the corresponding original PDF image) ')
ntogo = nacross # counter for table row padding with empty cells
html.append('
\n
')
for i,paths in enumerate(ourpdfs):
fname,thumb = paths
s= """
\n""" % (fname,thumb,fname,width,fname)
if ((i+1) % nacross == 0):
s += '
\n'
ntogo = 0
if i < (npdf - 1): # more to come
s += '
'*ntogo)
html.append('\n')
logt = open(logfname,'r').readlines()
logtext = [x for x in logt if x.strip() > '']
html.append('
%s log output
' % sectionname)
if len(logtext) > 1:
html.append('\n
\n')
html += logtext
html.append('\n
\n')
else:
html.append('%s is empty ' % logfname)
if len(fhtml) > 0:
fhtml.insert(0,'
Output File Name (click to view)
Size
\n')
fhtml.append('
')
html.append('
All output files available for downloading
\n')
html += fhtml # add all non-pdf files to the end of the display
else:
html.append('
### Error - %s returned no files - please confirm that parameters are sane
' % self.opts.interpreter)
html.append(galhtmlpostfix)
htmlf = file(self.opts.output_html,'w')
htmlf.write('\n'.join(html))
htmlf.write('\n')
htmlf.close()
self.html = html
def run(self):
"""
scripts must be small enough not to fill the pipe!
"""
if self.opts.envshpath <> 'system':
shell_source(self.opts.envshpath)
if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
retval = self.runBash()
else:
if self.opts.output_dir:
ste = open(self.elog,'w')
sto = open(self.tlog,'w')
sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
sto.flush()
p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,cwd=self.opts.output_dir)
else:
p = subprocess.Popen(self.cl,shell=False)
retval = p.wait()
if self.opts.output_dir:
sto.close()
ste.close()
err = open(self.elog,'r').readlines()
if retval <> 0 and err: # problem
print >> sys.stderr,err
if self.opts.make_HTML:
self.makeHtml()
return retval
def runBash(self):
"""
cannot use - for bash so use self.sfile
"""
if self.opts.output_dir:
s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
sto = open(self.tlog,'w')
sto.write(s)
sto.flush()
p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
else:
p = subprocess.Popen(self.cl,shell=False)
retval = p.wait()
if self.opts.output_dir:
sto.close()
if self.opts.make_HTML:
self.makeHtml()
return retval
def main():
u = """
This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
"""
op = optparse.OptionParser()
a = op.add_option
a('--script_path',default=None)
a('--tool_name',default=None)
a('--interpreter',default=None)
a('--output_dir',default='./')
a('--output_html',default=None)
a('--input_tab',default=[], action="append") # these are "galaxypath,metadataname" pairs
a("--input_formats",default="tabular")
a('--output_tab',default=None)
a('--output_format',default='tabular')
a('--user_email',default='Unknown')
a('--bad_user',default=None)
a('--make_Tool',default=None)
a('--make_HTML',default=None)
a('--help_text',default=None)
a('--tool_desc',default=None)
a('--new_tool',default=None)
a('--tool_version',default=None)
a('--include_dependencies',default="yes")
a('--citations',default=None)
a('--additional_parameters', dest='additional_parameters', action='append', default=[])
a('--edit_additional_parameters', action="store_true", default=False)
a('--envshpath',default="system")
opts, args = op.parse_args()
assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
if opts.output_dir:
try:
os.makedirs(opts.output_dir)
except:
pass
opts.input_tab = [x.replace('"','').replace("'",'') for x in opts.input_tab]
for i,x in enumerate(opts.additional_parameters): # remove quotes we need to deal with spaces in CL params
opts.additional_parameters[i] = opts.additional_parameters[i].replace('"','')
r = ScriptRunner(opts)
if opts.make_Tool:
retcode = r.makeTooltar()
else:
retcode = r.run()
os.unlink(r.sfile)
if retcode:
sys.exit(retcode) # indicate failure to job runner
if __name__ == "__main__":
main()