Mercurial > repos > mvdbeek > docker_scriptrunner
comparison scriptrunner.py @ 0:21d312776891 draft
planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit 30f8264cdd67d40dec8acde6407f32152e6a29c1-dirty
| author | mvdbeek | 
|---|---|
| date | Sat, 09 Jul 2016 16:57:13 -0400 | 
| parents | |
| children | 495946ffc2d6 | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:21d312776891 | 
|---|---|
| 1 # DockerToolFactory.py | |
| 2 # see https://github.com/mvdbeek/scriptrunner | |
| 3 | |
| 4 import sys | |
| 5 import shutil | |
| 6 import subprocess | |
| 7 import os | |
| 8 import time | |
| 9 import tempfile | |
| 10 import argparse | |
| 11 import getpass | |
| 12 import tarfile | |
| 13 import re | |
| 14 import shutil | |
| 15 import math | |
| 16 import fileinput | |
| 17 from os.path import abspath | |
| 18 | |
| 19 | |
| 20 progname = os.path.split(sys.argv[0])[1] | |
| 21 verbose = False | |
| 22 debug = False | |
| 23 | |
| 24 def timenow(): | |
| 25 """return current time as a string | |
| 26 """ | |
| 27 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) | |
| 28 | |
| 29 html_escape_table = { | |
| 30 "&": "&", | |
| 31 ">": ">", | |
| 32 "<": "<", | |
| 33 "$": "\$" | |
| 34 } | |
| 35 | |
| 36 def html_escape(text): | |
| 37 """Produce entities within text.""" | |
| 38 return "".join(html_escape_table.get(c,c) for c in text) | |
| 39 | |
| 40 def cmd_exists(cmd): | |
| 41 return subprocess.call("type " + cmd, shell=True, | |
| 42 stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 | |
| 43 | |
| 44 def construct_bind(host_path, container_path=False, binds=None, ro=True): | |
| 45 #TODO remove container_path if it's alwyas going to be the same as host_path | |
| 46 '''build or extend binds dictionary with container path. binds is used | |
| 47 to mount all files using the docker-py client.''' | |
| 48 if not binds: | |
| 49 binds={} | |
| 50 if isinstance(host_path, list): | |
| 51 for k,v in enumerate(host_path): | |
| 52 if not container_path: | |
| 53 container_path=host_path[k] | |
| 54 binds[host_path[k]]={'bind':container_path, 'ro':ro} | |
| 55 container_path=False #could be more elegant | |
| 56 return binds | |
| 57 else: | |
| 58 if not container_path: | |
| 59 container_path=host_path | |
| 60 binds[host_path]={'bind':container_path, 'ro':ro} | |
| 61 return binds | |
| 62 | |
| 63 def switch_to_docker(opts): | |
| 64 import docker #need local import, as container does not have docker-py | |
| 65 user_id = os.getuid() | |
| 66 group_id = os.getgid() | |
| 67 docker_client=docker.Client() | |
| 68 toolfactory_path=abspath(sys.argv[0]) | |
| 69 binds=construct_bind(host_path=opts.script_path, ro=False) | |
| 70 binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False) | |
| 71 if len(opts.input_tab)>0: | |
| 72 binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True) | |
| 73 if not opts.output_tab == 'None': | |
| 74 binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False) | |
| 75 if opts.make_HTML: | |
| 76 binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False) | |
| 77 binds=construct_bind(binds=binds, host_path=toolfactory_path) | |
| 78 volumes=binds.keys() | |
| 79 sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir | |
| 80 cmd=['python', '-u']+sys.argv+['--dockerized', '1', "--user_id", str(user_id), "--group_id", str(group_id)] | |
| 81 image_exists = [ True for image in docker_client.images() if opts.docker_image in image['RepoTags'] ] | |
| 82 if not image_exists: | |
| 83 docker_client.pull(opts.docker_image) | |
| 84 container=docker_client.create_container( | |
| 85 image=opts.docker_image, | |
| 86 volumes=volumes, | |
| 87 command=cmd | |
| 88 ) | |
| 89 docker_client.start(container=container[u'Id'], binds=binds) | |
| 90 docker_client.wait(container=container[u'Id']) | |
| 91 logs=docker_client.logs(container=container[u'Id']) | |
| 92 print "".join([log for log in logs]) | |
| 93 docker_client.remove_container(container[u'Id']) | |
| 94 | |
| 95 class ScriptRunner: | |
| 96 """class is a wrapper for an arbitrary script | |
| 97 """ | |
| 98 | |
| 99 def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'): | |
| 100 """ | |
| 101 cleanup inputs, setup some outputs | |
| 102 | |
| 103 """ | |
| 104 self.opts = opts | |
| 105 self.scriptname = 'script' | |
| 106 self.useIM = cmd_exists('convert') | |
| 107 self.useGS = cmd_exists('gs') | |
| 108 self.temp_warned = False # we want only one warning if $TMP not set | |
| 109 self.treatbashSpecial = treatbashSpecial | |
| 110 self.image_tag = image_tag | |
| 111 os.chdir(abspath(opts.output_dir)) | |
| 112 self.thumbformat = 'png' | |
| 113 s = open(self.opts.script_path,'r').readlines() | |
| 114 s = [x.rstrip() for x in s] # remove pesky dos line endings if needed | |
| 115 self.script = '\n'.join(s) | |
| 116 fhandle,self.sfile = tempfile.mkstemp(prefix='script',suffix=".%s" % (opts.interpreter)) | |
| 117 tscript = open(self.sfile,'w') # use self.sfile as script source for Popen | |
| 118 tscript.write(self.script) | |
| 119 tscript.close() | |
| 120 self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help | |
| 121 self.escapedScript = '\n'.join([html_escape(x) for x in s]) | |
| 122 self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.scriptname) | |
| 123 if opts.output_dir: # may not want these complexities | |
| 124 self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.scriptname) | |
| 125 art = '%s.%s' % (self.scriptname,opts.interpreter) | |
| 126 artpath = os.path.join(self.opts.output_dir,art) # need full path | |
| 127 artifact = open(artpath,'w') # use self.sfile as script source for Popen | |
| 128 artifact.write(self.script) | |
| 129 artifact.close() | |
| 130 self.cl = [] | |
| 131 self.html = [] | |
| 132 a = self.cl.append | |
| 133 a(opts.interpreter) | |
| 134 if self.treatbashSpecial and opts.interpreter in ['bash','sh']: | |
| 135 a(self.sfile) | |
| 136 else: | |
| 137 a('-') # stdin | |
| 138 for input in opts.input_tab: | |
| 139 a(input) | |
| 140 if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname | |
| 141 a(str(self.scriptname)+'.out') | |
| 142 a(opts.output_tab) | |
| 143 for param in opts.additional_parameters: | |
| 144 param, value=param.split(',') | |
| 145 a('--'+param) | |
| 146 a(value) | |
| 147 self.outFormats = opts.output_format | |
| 148 self.inputFormats = [formats for formats in opts.input_formats] | |
| 149 self.test1Input = '%s_test1_input.xls' % self.scriptname | |
| 150 self.test1Output = '%s_test1_output.xls' % self.scriptname | |
| 151 self.test1HTML = '%s_test1_output.html' % self.scriptname | |
| 152 | |
| 153 | |
| 154 def compressPDF(self,inpdf=None,thumbformat='png'): | |
| 155 """need absolute path to pdf | |
| 156 note that GS gets confoozled if no $TMP or $TEMP | |
| 157 so we set it | |
| 158 """ | |
| 159 assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName) | |
| 160 hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf)) | |
| 161 sto = open(hlog,'a') | |
| 162 our_env = os.environ.copy() | |
| 163 our_tmp = our_env.get('TMP',None) | |
| 164 if not our_tmp: | |
| 165 our_tmp = our_env.get('TEMP',None) | |
| 166 if not (our_tmp and os.path.exists(our_tmp)): | |
| 167 newtmp = os.path.join(self.opts.output_dir,'tmp') | |
| 168 try: | |
| 169 os.mkdir(newtmp) | |
| 170 except: | |
| 171 sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp) | |
| 172 our_env['TEMP'] = newtmp | |
| 173 if not self.temp_warned: | |
| 174 sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp) | |
| 175 self.temp_warned = True | |
| 176 outpdf = '%s_compressed' % inpdf | |
| 177 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf] | |
| 178 x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) | |
| 179 retval1 = x.wait() | |
| 180 sto.close() | |
| 181 if retval1 == 0: | |
| 182 os.unlink(inpdf) | |
| 183 shutil.move(outpdf,inpdf) | |
| 184 os.unlink(hlog) | |
| 185 hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf)) | |
| 186 sto = open(hlog,'w') | |
| 187 outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat) | |
| 188 cl2 = ['convert', inpdf, outpng] | |
| 189 x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env) | |
| 190 retval2 = x.wait() | |
| 191 sto.close() | |
| 192 if retval2 == 0: | |
| 193 os.unlink(hlog) | |
| 194 retval = retval1 or retval2 | |
| 195 return retval | |
| 196 | |
| 197 | |
| 198 def getfSize(self,fpath,outpath): | |
| 199 """ | |
| 200 format a nice file size string | |
| 201 """ | |
| 202 size = '' | |
| 203 fp = os.path.join(outpath,fpath) | |
| 204 if os.path.isfile(fp): | |
| 205 size = '0 B' | |
| 206 n = float(os.path.getsize(fp)) | |
| 207 if n > 2**20: | |
| 208 size = '%1.1f MB' % (n/2**20) | |
| 209 elif n > 2**10: | |
| 210 size = '%1.1f KB' % (n/2**10) | |
| 211 elif n > 0: | |
| 212 size = '%d B' % (int(n)) | |
| 213 return size | |
| 214 | |
| 215 def makeHtml(self): | |
| 216 """ Create an HTML file content to list all the artifacts found in the output_dir | |
| 217 """ | |
| 218 | |
| 219 galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
| 220 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
| 221 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
| 222 <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> | |
| 223 <title></title> | |
| 224 <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> | |
| 225 </head> | |
| 226 <body> | |
| 227 <div class="toolFormBody"> | |
| 228 """ | |
| 229 galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" | |
| 230 galhtmlpostfix = """</div></body></html>\n""" | |
| 231 | |
| 232 flist = os.listdir(self.opts.output_dir) | |
| 233 flist = [x for x in flist if x <> 'Rplots.pdf'] | |
| 234 flist.sort() | |
| 235 html = [] | |
| 236 html.append(galhtmlprefix % progname) | |
| 237 html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname,timenow())) | |
| 238 fhtml = [] | |
| 239 if len(flist) > 0: | |
| 240 logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections | |
| 241 logfiles.sort() | |
| 242 logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)] | |
| 243 logfiles.append(abspath(self.tlog)) # make it the last one | |
| 244 pdflist = [] | |
| 245 npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']) | |
| 246 for rownum,fname in enumerate(flist): | |
| 247 dname,e = os.path.splitext(fname) | |
| 248 sfsize = self.getfSize(fname,self.opts.output_dir) | |
| 249 if e.lower() == '.pdf' : # compress and make a thumbnail | |
| 250 thumb = '%s.%s' % (dname,self.thumbformat) | |
| 251 pdff = os.path.join(self.opts.output_dir,fname) | |
| 252 retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat) | |
| 253 if retval == 0: | |
| 254 pdflist.append((fname,thumb)) | |
| 255 else: | |
| 256 pdflist.append((fname,fname)) | |
| 257 if (rownum+1) % 2 == 0: | |
| 258 fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) | |
| 259 else: | |
| 260 fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize)) | |
| 261 for logfname in logfiles: # expect at least tlog - if more | |
| 262 if abspath(logfname) == abspath(self.tlog): # handled later | |
| 263 sectionname = 'All tool run' | |
| 264 if (len(logfiles) > 1): | |
| 265 sectionname = 'Other' | |
| 266 ourpdfs = pdflist | |
| 267 else: | |
| 268 realname = os.path.basename(logfname) | |
| 269 sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log | |
| 270 ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname] | |
| 271 pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove | |
| 272 nacross = 1 | |
| 273 npdf = len(ourpdfs) | |
| 274 | |
| 275 if npdf > 0: | |
| 276 nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2))) | |
| 277 if int(nacross)**2 != npdf: | |
| 278 nacross += 1 | |
| 279 nacross = int(nacross) | |
| 280 width = min(400,int(1200/nacross)) | |
| 281 html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname) | |
| 282 html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>') | |
| 283 ntogo = nacross # counter for table row padding with empty cells | |
| 284 html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>') | |
| 285 for i,paths in enumerate(ourpdfs): | |
| 286 fname,thumb = paths | |
| 287 s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" | |
| 288 alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname) | |
| 289 if ((i+1) % nacross == 0): | |
| 290 s += '</tr>\n' | |
| 291 ntogo = 0 | |
| 292 if i < (npdf - 1): # more to come | |
| 293 s += '<tr>' | |
| 294 ntogo = nacross | |
| 295 else: | |
| 296 ntogo -= 1 | |
| 297 html.append(s) | |
| 298 if html[-1].strip().endswith('</tr>'): | |
| 299 html.append('</table></div>\n') | |
| 300 else: | |
| 301 if ntogo > 0: # pad | |
| 302 html.append('<td> </td>'*ntogo) | |
| 303 html.append('</tr></table></div>\n') | |
| 304 logt = open(logfname,'r').readlines() | |
| 305 logtext = [x for x in logt if x.strip() > ''] | |
| 306 html.append('<div class="toolFormTitle">%s log output</div>' % sectionname) | |
| 307 if len(logtext) > 1: | |
| 308 html.append('\n<pre>\n') | |
| 309 html += logtext | |
| 310 html.append('\n</pre>\n') | |
| 311 else: | |
| 312 html.append('%s is empty<br/>' % logfname) | |
| 313 if len(fhtml) > 0: | |
| 314 fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n') | |
| 315 fhtml.append('</table></div><br/>') | |
| 316 html.append('<div class="toolFormTitle">All output files available for downloading</div>\n') | |
| 317 html += fhtml # add all non-pdf files to the end of the display | |
| 318 else: | |
| 319 html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter) | |
| 320 html.append(galhtmlpostfix) | |
| 321 htmlf = file(self.opts.output_html,'w') | |
| 322 htmlf.write('\n'.join(html)) | |
| 323 htmlf.write('\n') | |
| 324 htmlf.close() | |
| 325 self.html = html | |
| 326 | |
| 327 | |
| 328 def run(self): | |
| 329 """ | |
| 330 scripts must be small enough not to fill the pipe! | |
| 331 """ | |
| 332 if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']: | |
| 333 retval = self.runBash() | |
| 334 else: | |
| 335 if self.opts.output_dir: | |
| 336 ste = open(self.elog,'w') | |
| 337 sto = open(self.tlog,'w') | |
| 338 sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl)) | |
| 339 sto.flush() | |
| 340 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir) | |
| 341 else: | |
| 342 p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE) | |
| 343 p.stdin.write(self.script) | |
| 344 p.stdin.close() | |
| 345 retval = p.wait() | |
| 346 if self.opts.output_dir: | |
| 347 sto.close() | |
| 348 ste.close() | |
| 349 err = open(self.elog,'r').readlines() | |
| 350 if retval <> 0 and err: # problem | |
| 351 print >> sys.stderr,err #same problem, need to capture docker stdin/stdout | |
| 352 if self.opts.make_HTML: | |
| 353 self.makeHtml() | |
| 354 return retval | |
| 355 | |
| 356 def runBash(self): | |
| 357 """ | |
| 358 cannot use - for bash so use self.sfile | |
| 359 """ | |
| 360 if self.opts.output_dir: | |
| 361 s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl) | |
| 362 sto = open(self.tlog,'w') | |
| 363 sto.write(s) | |
| 364 sto.flush() | |
| 365 p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir) | |
| 366 else: | |
| 367 p = subprocess.Popen(self.cl,shell=False) | |
| 368 retval = p.wait() | |
| 369 if self.opts.output_dir: | |
| 370 sto.close() | |
| 371 if self.opts.make_HTML: | |
| 372 self.makeHtml() | |
| 373 return retval | |
| 374 | |
| 375 | |
| 376 def change_user_id(new_uid, new_gid): | |
| 377 """ | |
| 378 To avoid issues with wrong user ids, we change the user id of the 'galaxy' user in the container | |
| 379 to the user id with which the script has been called initially. | |
| 380 """ | |
| 381 cmd1 = ["/usr/sbin/usermod", "-d", "/var/home/galaxy", "galaxy"] | |
| 382 cmd2 = ["/usr/sbin/usermod", "-u", new_uid, "galaxy"] | |
| 383 cmd3 = ["/usr/sbin/groupmod", "-g", new_gid, "galaxy"] | |
| 384 cmd4 = ["/usr/sbin/usermod", "-d", "/home/galaxy", "galaxy"] | |
| 385 [subprocess.call(cmd) for cmd in [cmd1, cmd2, cmd3, cmd4]] | |
| 386 | |
| 387 | |
| 388 def main(): | |
| 389 u = """ | |
| 390 This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as: | |
| 391 <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript" | |
| 392 </command> | |
| 393 """ | |
| 394 op = argparse.ArgumentParser() | |
| 395 a = op.add_argument | |
| 396 a('--docker_image',default=None) | |
| 397 a('--script_path',default=None) | |
| 398 a('--tool_name',default=None) | |
| 399 a('--interpreter',default=None) | |
| 400 a('--output_dir',default='./') | |
| 401 a('--output_html',default=None) | |
| 402 a('--input_tab',default='None', nargs='*') | |
| 403 a('--output_tab',default='None') | |
| 404 a('--user_email',default='Unknown') | |
| 405 a('--bad_user',default=None) | |
| 406 a('--make_HTML',default=None) | |
| 407 a('--new_tool',default=None) | |
| 408 a('--dockerized',default=0) | |
| 409 a('--group_id',default=None) | |
| 410 a('--user_id',default=None) | |
| 411 a('--output_format', default='tabular') | |
| 412 a('--input_format', dest='input_formats', action='append', default=[]) | |
| 413 a('--additional_parameters', dest='additional_parameters', action='append', default=[]) | |
| 414 opts = op.parse_args() | |
| 415 assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user) | |
| 416 assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R' | |
| 417 if opts.output_dir: | |
| 418 try: | |
| 419 os.makedirs(opts.output_dir) | |
| 420 except: | |
| 421 pass | |
| 422 if opts.dockerized==0: | |
| 423 switch_to_docker(opts) | |
| 424 return | |
| 425 change_user_id(opts.user_id, opts.group_id) | |
| 426 os.setgid(int(opts.group_id)) | |
| 427 os.setuid(int(opts.user_id)) | |
| 428 r = ScriptRunner(opts) | |
| 429 retcode = r.run() | |
| 430 os.unlink(r.sfile) | |
| 431 if retcode: | |
| 432 sys.exit(retcode) # indicate failure to job runner | |
| 433 | |
| 434 | |
| 435 if __name__ == "__main__": | |
| 436 main() | 
