view scriptrunner.py @ 1:315a7e9ed6eb draft

planemo upload for repository https://github.com/mvdbeek/docker_scriptrunner/ commit 30f8264cdd67d40dec8acde6407f32152e6a29c1-dirty
author mvdbeek
date Sat, 09 Jul 2016 17:00:06 -0400
parents 21d312776891
children 495946ffc2d6
line wrap: on
line source

# DockerToolFactory.py
# see https://github.com/mvdbeek/scriptrunner

import sys 
import shutil 
import subprocess 
import os 
import time 
import tempfile 
import argparse
import getpass
import tarfile
import re
import shutil
import math
import fileinput
from os.path import abspath 


progname = os.path.split(sys.argv[0])[1] 
verbose = False 
debug = False

def timenow():
    """return current time as a string
    """
    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))

html_escape_table = {
     "&": "&",
     ">": ">",
     "<": "&lt;",
     "$": "\$"
     }

def html_escape(text):
     """Produce entities within text."""
     return "".join(html_escape_table.get(c,c) for c in text)

def cmd_exists(cmd):
     return subprocess.call("type " + cmd, shell=True, 
           stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0

def construct_bind(host_path, container_path=False, binds=None, ro=True):
    #TODO remove container_path if it's alwyas going to be the same as host_path
    '''build or extend binds dictionary with container path. binds is used
    to mount all files using the docker-py client.'''
    if not binds:
        binds={}
    if isinstance(host_path, list):
        for k,v in enumerate(host_path):
            if not container_path:
                container_path=host_path[k]
            binds[host_path[k]]={'bind':container_path, 'ro':ro}
            container_path=False #could be more elegant
        return binds
    else:
        if not container_path:
            container_path=host_path
        binds[host_path]={'bind':container_path, 'ro':ro}
        return binds

def switch_to_docker(opts):
    import docker #need local import, as container does not have docker-py
    user_id = os.getuid()
    group_id = os.getgid()
    docker_client=docker.Client()
    toolfactory_path=abspath(sys.argv[0])
    binds=construct_bind(host_path=opts.script_path, ro=False)
    binds=construct_bind(binds=binds, host_path=abspath(opts.output_dir), ro=False)
    if len(opts.input_tab)>0:
        binds=construct_bind(binds=binds, host_path=opts.input_tab, ro=True)
    if not opts.output_tab == 'None':
        binds=construct_bind(binds=binds, host_path=opts.output_tab, ro=False)
    if opts.make_HTML:
        binds=construct_bind(binds=binds, host_path=opts.output_html, ro=False)
    binds=construct_bind(binds=binds, host_path=toolfactory_path)
    volumes=binds.keys()
    sys.argv=[abspath(opts.output_dir) if sys.argv[i-1]=='--output_dir' else arg for i,arg in enumerate(sys.argv)] ##inject absolute path of working_dir
    cmd=['python', '-u']+sys.argv+['--dockerized', '1', "--user_id", str(user_id), "--group_id", str(group_id)]
    image_exists = [ True for image in docker_client.images() if opts.docker_image in image['RepoTags'] ]
    if not image_exists:
        docker_client.pull(opts.docker_image)
    container=docker_client.create_container(
        image=opts.docker_image,
        volumes=volumes,
        command=cmd
        )
    docker_client.start(container=container[u'Id'], binds=binds)
    docker_client.wait(container=container[u'Id'])
    logs=docker_client.logs(container=container[u'Id'])
    print "".join([log for log in logs])
    docker_client.remove_container(container[u'Id'])

class ScriptRunner:
    """class is a wrapper for an arbitrary script
    """

    def __init__(self,opts=None,treatbashSpecial=True, image_tag='base'):
        """
        cleanup inputs, setup some outputs
        
        """
        self.opts = opts
        self.scriptname = 'script'
        self.useIM = cmd_exists('convert')
        self.useGS = cmd_exists('gs')
        self.temp_warned = False # we want only one warning if $TMP not set
        self.treatbashSpecial = treatbashSpecial
        self.image_tag = image_tag
        os.chdir(abspath(opts.output_dir))
        self.thumbformat = 'png'
        s = open(self.opts.script_path,'r').readlines()
        s = [x.rstrip() for x in s] # remove pesky dos line endings if needed
        self.script = '\n'.join(s)
        fhandle,self.sfile = tempfile.mkstemp(prefix='script',suffix=".%s" % (opts.interpreter))
        tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
        tscript.write(self.script)
        tscript.close()
        self.indentedScript = '\n'.join([' %s' % html_escape(x) for x in s]) # for restructured text in help
        self.escapedScript = '\n'.join([html_escape(x) for x in s])
        self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.scriptname)
        if opts.output_dir: # may not want these complexities
            self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.scriptname)
            art = '%s.%s' % (self.scriptname,opts.interpreter)
            artpath = os.path.join(self.opts.output_dir,art) # need full path
            artifact = open(artpath,'w') # use self.sfile as script source for Popen
            artifact.write(self.script)
            artifact.close()
        self.cl = []
        self.html = []
        a = self.cl.append
        a(opts.interpreter)
        if self.treatbashSpecial and opts.interpreter in ['bash','sh']:
            a(self.sfile)
        else:
            a('-') # stdin
	for input in opts.input_tab:
	  a(input) 
        if opts.output_tab == 'None': #If tool generates only HTML, set output name to toolname
            a(str(self.scriptname)+'.out')
        a(opts.output_tab)
	for param in opts.additional_parameters:
          param, value=param.split(',')
          a('--'+param)
          a(value)
        self.outFormats = opts.output_format
        self.inputFormats = [formats for formats in opts.input_formats]
        self.test1Input = '%s_test1_input.xls' % self.scriptname
        self.test1Output = '%s_test1_output.xls' % self.scriptname
        self.test1HTML = '%s_test1_output.html' % self.scriptname


    def compressPDF(self,inpdf=None,thumbformat='png'):
        """need absolute path to pdf
           note that GS gets confoozled if no $TMP or $TEMP
           so we set it
        """
        assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
        hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
        sto = open(hlog,'a')
        our_env = os.environ.copy()
        our_tmp = our_env.get('TMP',None)
        if not our_tmp:
            our_tmp = our_env.get('TEMP',None)
        if not (our_tmp and os.path.exists(our_tmp)):
            newtmp = os.path.join(self.opts.output_dir,'tmp')
            try:
                os.mkdir(newtmp)
            except:
                sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
            our_env['TEMP'] = newtmp
            if not self.temp_warned:
               sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
               self.temp_warned = True          
        outpdf = '%s_compressed' % inpdf
        cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
        x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
        retval1 = x.wait()
        sto.close()
        if retval1 == 0:
            os.unlink(inpdf)
            shutil.move(outpdf,inpdf)
            os.unlink(hlog)
        hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
        sto = open(hlog,'w')
        outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
        cl2 = ['convert', inpdf, outpng]
        x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
        retval2 = x.wait()
        sto.close()
        if retval2 == 0:
             os.unlink(hlog)
        retval = retval1 or retval2
        return retval


    def getfSize(self,fpath,outpath):
        """
        format a nice file size string
        """
        size = ''
        fp = os.path.join(outpath,fpath)
        if os.path.isfile(fp):
            size = '0 B'
            n = float(os.path.getsize(fp))
            if n > 2**20:
                size = '%1.1f MB' % (n/2**20)
            elif n > 2**10:
                size = '%1.1f KB' % (n/2**10)
            elif n > 0:
                size = '%d B' % (int(n))
        return size

    def makeHtml(self):
        """ Create an HTML file content to list all the artifacts found in the output_dir
        """

        galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 
        <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
        <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> 
        <title></title> 
        <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> 
        </head> 
        <body> 
        <div class="toolFormBody"> 
        """ 
        galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" 
        galhtmlpostfix = """</div></body></html>\n"""

        flist = os.listdir(self.opts.output_dir)
        flist = [x for x in flist if x <> 'Rplots.pdf']
        flist.sort()
        html = []
        html.append(galhtmlprefix % progname)
        html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.scriptname,timenow()))
        fhtml = []
        if len(flist) > 0:
            logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
            logfiles.sort()
            logfiles = [x for x in logfiles if abspath(x) <> abspath(self.tlog)]
            logfiles.append(abspath(self.tlog)) # make it the last one
            pdflist = []
            npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
            for rownum,fname in enumerate(flist):
                dname,e = os.path.splitext(fname)
                sfsize = self.getfSize(fname,self.opts.output_dir)
                if e.lower() == '.pdf' : # compress and make a thumbnail
                    thumb = '%s.%s' % (dname,self.thumbformat)
                    pdff = os.path.join(self.opts.output_dir,fname)
                    retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
                    if retval == 0:
                        pdflist.append((fname,thumb))
                    else:
                        pdflist.append((fname,fname))
                if (rownum+1) % 2 == 0:
                    fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
                else:
                    fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
            for logfname in logfiles: # expect at least tlog - if more
                if abspath(logfname) == abspath(self.tlog): # handled later
                    sectionname = 'All tool run'
                    if (len(logfiles) > 1):
                        sectionname = 'Other'
                    ourpdfs = pdflist
                else:
                    realname = os.path.basename(logfname)
                    sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
                    ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
                    pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
                nacross = 1
                npdf = len(ourpdfs)

                if npdf > 0:
                    nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
                    if int(nacross)**2 != npdf:
                        nacross += 1
                    nacross = int(nacross)
                    width = min(400,int(1200/nacross))
                    html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
                    html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
                    ntogo = nacross # counter for table row padding with empty cells
                    html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
                    for i,paths in enumerate(ourpdfs): 
                        fname,thumb = paths
                        s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" 
                           alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
                        if ((i+1) % nacross == 0):
                            s += '</tr>\n'
                            ntogo = 0
                            if i < (npdf - 1): # more to come
                               s += '<tr>'
                               ntogo = nacross
                        else:
                            ntogo -= 1
                        html.append(s)
                    if html[-1].strip().endswith('</tr>'):
                        html.append('</table></div>\n')
                    else:
                        if ntogo > 0: # pad
                           html.append('<td>&nbsp;</td>'*ntogo)
                        html.append('</tr></table></div>\n')
                logt = open(logfname,'r').readlines()
                logtext = [x for x in logt if x.strip() > '']
                html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
                if len(logtext) > 1:
                    html.append('\n<pre>\n')
                    html += logtext
                    html.append('\n</pre>\n')
                else:
                    html.append('%s is empty<br/>' % logfname)
        if len(fhtml) > 0:
           fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
           fhtml.append('</table></div><br/>')
           html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
           html += fhtml # add all non-pdf files to the end of the display
        else:
            html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
        html.append(galhtmlpostfix)
        htmlf = file(self.opts.output_html,'w')
        htmlf.write('\n'.join(html))
        htmlf.write('\n')
        htmlf.close()
        self.html = html


    def run(self):
        """
        scripts must be small enough not to fill the pipe!
        """
        if self.treatbashSpecial and self.opts.interpreter in ['bash','sh']:
          retval = self.runBash()
        else:
            if self.opts.output_dir:
                ste = open(self.elog,'w')
                sto = open(self.tlog,'w')
                sto.write('## Toolfactory generated command line = %s\n' % ' '.join(self.cl))
                sto.flush()
                p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,stdin=subprocess.PIPE,cwd=self.opts.output_dir)
            else:
                p = subprocess.Popen(self.cl,shell=False,stdin=subprocess.PIPE)
            p.stdin.write(self.script)
            p.stdin.close()
            retval = p.wait()
            if self.opts.output_dir:
                sto.close()
                ste.close()
                err = open(self.elog,'r').readlines()
                if retval <> 0 and err: # problem
                    print >> sys.stderr,err #same problem, need to capture docker stdin/stdout
            if self.opts.make_HTML:
                self.makeHtml()
        return retval

    def runBash(self):
        """
        cannot use - for bash so use self.sfile
        """
        if self.opts.output_dir:
            s = '## Toolfactory generated command line = %s\n' % ' '.join(self.cl)
            sto = open(self.tlog,'w')
            sto.write(s)
            sto.flush()
            p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=sto,cwd=self.opts.output_dir)
        else:
            p = subprocess.Popen(self.cl,shell=False)            
        retval = p.wait()
        if self.opts.output_dir:
            sto.close()
        if self.opts.make_HTML:
            self.makeHtml()
        return retval
  

def change_user_id(new_uid, new_gid):
    """
    To avoid issues with wrong user ids, we change the user id of the 'galaxy' user in the container
    to the user id with which the script has been called initially.
    """
    cmd1 = ["/usr/sbin/usermod", "-d", "/var/home/galaxy", "galaxy"]
    cmd2 = ["/usr/sbin/usermod", "-u", new_uid, "galaxy"]
    cmd3 = ["/usr/sbin/groupmod", "-g", new_gid, "galaxy"]
    cmd4 = ["/usr/sbin/usermod", "-d", "/home/galaxy", "galaxy"]
    [subprocess.call(cmd) for cmd in [cmd1, cmd2, cmd3, cmd4]]


def main():
    u = """
    This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
    <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
    </command>
    """
    op = argparse.ArgumentParser()
    a = op.add_argument
    a('--docker_image',default=None)
    a('--script_path',default=None)
    a('--tool_name',default=None)
    a('--interpreter',default=None)
    a('--output_dir',default='./')
    a('--output_html',default=None)
    a('--input_tab',default='None', nargs='*')
    a('--output_tab',default='None')
    a('--user_email',default='Unknown')
    a('--bad_user',default=None)
    a('--make_HTML',default=None)
    a('--new_tool',default=None)
    a('--dockerized',default=0)
    a('--group_id',default=None)
    a('--user_id',default=None)
    a('--output_format', default='tabular')
    a('--input_format', dest='input_formats', action='append', default=[])
    a('--additional_parameters', dest='additional_parameters', action='append', default=[])
    opts = op.parse_args()
    assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
    assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
    if opts.output_dir:
        try:
            os.makedirs(opts.output_dir)
        except:
            pass
    if opts.dockerized==0:
      switch_to_docker(opts)
      return
    change_user_id(opts.user_id, opts.group_id)
    os.setgid(int(opts.group_id))
    os.setuid(int(opts.user_id))
    r = ScriptRunner(opts)
    retcode = r.run()
    os.unlink(r.sfile)
    if retcode:
        sys.exit(retcode) # indicate failure to job runner


if __name__ == "__main__":
    main()