view rgToolFactory2.py @ 22:4e3aa95ed3ac draft

Uploaded
author fubar
date Mon, 02 Mar 2015 05:14:05 -0500
parents 71b85c322600
children
line wrap: on
line source

# rgToolFactoryMultIn.py
# see https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
# 
# copyright ross lazarus (ross stop lazarus at gmail stop com) May 2012
# 
# all rights reserved
# Licensed under the LGPL
# suggestions for improvement and bug fixes welcome at https://bitbucket.org/fubar/galaxytoolfactory/wiki/Home
#
# January 2015
# unified all setups by passing the script on the cl rather than via a PIPE - no need for treat_bash_special so removed
#
# in the process of building a complex tool
# added ability to choose one of the current toolshed package_r or package_perl or package_python dependencies and source that package
# add that package to tool_dependencies
# Note that once the generated tool is loaded, it will have that package's env.sh loaded automagically so there is no
# --envshpath in the parameters for the generated tool and it uses the system one which will be first on the adjusted path.
#
# sept 2014 added additional params from
# https://bitbucket.org/mvdbeek/dockertoolfactory/src/d4863bcf7b521532c7e8c61b6333840ba5393f73/DockerToolFactory.py?at=default
# passing them is complex
# and they are restricted to NOT contain commas or double quotes to ensure that they can be safely passed together on 
# the toolfactory command line as a comma delimited double quoted string for parsing and passing to the script
# see examples on this tool form

# august 2014

# Allows arbitrary number of input files
# NOTE positional parameters are now passed to script
# and output (may be "None") is *before* arbitrary number of inputs
#
# march 2014
# had to remove dependencies because cross toolshed dependencies are not possible - can't pre-specify a toolshed url for graphicsmagick and ghostscript
# grrrrr - night before a demo
# added dependencies to a tool_dependencies.xml if html page generated so generated tool is properly portable
#
# added ghostscript and graphicsmagick as dependencies 
# fixed a wierd problem where gs was trying to use the new_files_path from universe (database/tmp) as ./database/tmp
# errors ensued
#
# august 2013
# found a problem with GS if $TMP or $TEMP missing - now inject /tmp and warn
#
# july 2013
# added ability to combine images and individual log files into html output
# just make sure there's a log file foo.log and it will be output
# together with all images named like "foo_*.pdf
# otherwise old format for html
#
# January 2013
# problem pointed out by Carlos Borroto
# added escaping for <>$ - thought I did that ages ago...
#
# August 11 2012 
# changed to use shell=False and cl as a sequence

# This is a Galaxy tool factory for simple scripts in python, R or whatever ails ye.
# It also serves as the wrapper for the new tool.
# 
# you paste and run your script
# Only works for simple scripts that read one input from the history.
# Optionally can write one new history dataset,
# and optionally collect any number of outputs into links on an autogenerated HTML page.

# DO NOT install on a public or important site - please.

# installed generated tools are fine if the script is safe.
# They just run normally and their user cannot do anything unusually insecure
# but please, practice safe toolshed.
# Read the fucking code before you install any tool 
# especially this one

# After you get the script working on some test data, you can
# optionally generate a toolshed compatible gzip file
# containing your script safely wrapped as an ordinary Galaxy script in your local toolshed for
# safe and largely automated installation in a production Galaxy.

# If you opt for an HTML output, you get all the script outputs arranged
# as a single Html history item - all output files are linked, thumbnails for all the pdfs.
# Ugly but really inexpensive.
# 
# Patches appreciated please. 
#
#
# long route to June 2012 product
# Behold the awesome power of Galaxy and the toolshed with the tool factory to bind them
# derived from an integrated script model  
# called rgBaseScriptWrapper.py
# Note to the unwary:
#   This tool allows arbitrary scripting on your Galaxy as the Galaxy user
#   There is nothing stopping a malicious user doing whatever they choose
#   Extremely dangerous!!
#   Totally insecure. So, trusted users only
#
# preferred model is a developer using their throw away workstation instance - ie a private site.
# no real risk. The universe_wsgi.ini admin_users string is checked - only admin users are permitted to run this tool.
#

import sys 
import shutil 
import subprocess 
import os 
import time 
import tempfile 
import optparse
import tarfile
import re
import shutil
import math

progname = os.path.split(sys.argv[0])[1] 
myversion = 'V001.1 March 2014' 
verbose = False 
debug = False
toolFactoryURL = 'https://bitbucket.org/fubar/galaxytoolfactory'

# if we do html we need these dependencies specified in a tool_dependencies.xml file and referred to in the generated
# tool xml

def timenow():
    """return current time as a string
    """
    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))

def quote_non_numeric(s):
    """return a prequoted string for non-numerics
    useful for perl and Rscript parameter passing?
    """
    try:
        res = float(s)
        return s
    except ValueError:
        return '"%s"' % s

html_escape_table = {
     "&": "&amp;",
     ">": "&gt;",
     "<": "&lt;",
     "$": "\$"
     }

def html_escape(text):
     """Produce entities within text."""
     return "".join(html_escape_table.get(c,c) for c in text)


def html_unescape(text):
     """Revert entities within text."""
     t = text.replace('&amp;','&').replace('&gt;','>').replace('&lt;','<').replace('\$','$')
     return t
     
def cmd_exists(cmd):
     return subprocess.call("type " + cmd, shell=True, 
           stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0

def parse_citations(citations_text):
    """
    """
    citations = [c for c in citations_text.split("**ENTRY**") if c.strip()]
    citation_tuples = []
    for citation in citations:
        if citation.startswith("doi"):
            citation_tuples.append( ("doi", citation[len("doi"):].strip() ) )
        else:
            citation_tuples.append( ("bibtex", citation[len("bibtex"):].strip() ) )
    return citation_tuples

def shell_source(script):
    """need a way to source a Galaxy tool interpreter env.sh to point at the right dependency package 
    This based on the idea in http://pythonwise.blogspot.fr/2010/04/sourcing-shell-script.html
    Note that we have to finesse any wierdly quoted newlines in automagic exports using nulls (env -0) as newlines"""
    pipe = subprocess.Popen("env -i ; . %s ; env -0" % script, stdout=subprocess.PIPE, shell=True)
    output = pipe.communicate()[0]
    outl = output.split('\0')
    outl = [x for x in outl if len(x.split("=")) == 2]
    newenv = dict((line.split("=", 1) for line in outl))
    os.environ.update(newenv)
    
class ScriptRunner:
    """class is a wrapper for an arbitrary script
    note funky templating. this should all be done proper.
    Problem is, this kludge developed quite naturally and seems to work ok with
    little overhead...
    
    """


    def __init__(self,opts=None):
        """
        cleanup inputs, setup some outputs
        
        """
        
        self.toolhtmldepinterpskel = """<?xml version="1.0"?>
        <tool_dependency>
            <package name="ghostscript" version="9.10">
                <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
            </package>
            <package name="graphicsmagick" version="1.3.18">
                <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
            </package>
             <package name="%(interpreter_name)s" version="%(interpreter_version)s">
                <repository name="%(interpreter_pack)s" owner="%(interpreter_owner)s" prior_installation_required="True" />
            </package>
           
                <readme>
                   %(readme)s
                   This file was autogenerated by the Galaxy Tool Factory 2
               </readme>
        </tool_dependency>
        """
        
        self.toolhtmldepskel = """<?xml version="1.0"?>
        <tool_dependency>
            <package name="ghostscript" version="9.10">
                <repository name="package_ghostscript_9_10" owner="devteam" prior_installation_required="True" />
            </package>
            <package name="graphicsmagick" version="1.3.18">
                <repository name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" />
            </package>
                <readme>
                   %(readme)s
                   This file was autogenerated by the Galaxy Tool Factory 2
               </readme>
        </tool_dependency>
        """

        self.emptytoolhtmldepskel = """<?xml version="1.0"?>
        <tool_dependency>
                <readme>
                   %(readme)s
                This file was autogenerated by the Galaxy Tool Factory 2
               </readme>
        </tool_dependency>
        """

        self.protorequirements = """<requirements>
              <requirement type="package" version="9.10">ghostscript</requirement>
              <requirement type="package" version="1.3.18">graphicsmagick</requirement>
          </requirements>"""
          
        self.protorequirements_interpreter = """<requirements>
              <requirement type="package" version="9.10">ghostscript</requirement>
              <requirement type="package" version="1.3.18">graphicsmagick</requirement>
              <requirement type="package" version="%(interpreter_version)s">%(interpreter_name)s</requirement>
          </requirements>"""
          

        self.newCommand="""
            %(toolname)s.py --script_path "$runMe" --interpreter "%(interpreter)s" 
                --tool_name "%(toolname)s"
                %(command_inputs)s
                %(command_outputs)s
            """
    
        self.tooltestsTabOnly = """
            <test>
            %(test1Inputs)s
            <param name="job_name" value="test1"/>
            <param name="runMe" value="$runMe"/>
            <output name="output1="%(test1Output)s" ftype="tabular"/>
            %(additionalParams)s
            </test>
            """
            
        self.tooltestsHTMLOnly = """
            <test>
            %(test1Inputs)s
            <param name="job_name" value="test1"/>
            <param name="runMe" value="$runMe"/>
            %(additionalParams)s
            <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="5"/>
            </test>
            """
            
        self.tooltestsBoth = """
            <test>
            %(test1Inputs)s
            <param name="job_name" value="test1"/>
            <param name="runMe" value="$runMe"/>
            %(additionalParams)s
            <output name="output1" file="%(test1Output)s" ftype="tabular" />
            <output name="html_file" file="%(test1HTML)s" ftype="html" lines_diff="10"/>
            </test>
            """

        self.newXML="""<tool id="%(toolid)s" name="%(toolname)s" version="%(tool_version)s">
%(tooldesc)s
%(requirements)s
<command interpreter="python">
%(command)s
</command>
<inputs>
%(inputs)s
%(additionalInputs)s
</inputs>
<outputs>
%(outputs)s
</outputs>
<configfiles>
<configfile name="runMe">
%(script)s
</configfile>
</configfiles>
<tests>
%(tooltests)s
</tests>
<help>

%(help)s

This tool was autogenerated from a user provided script using the Galaxy Tool Factory 2
https://toolshed.g2.bx.psu.edu/view/fubar/tool_factory_2
</help>
<citations>
    %(citations)s
    <citation type="doi">10.1093/bioinformatics/bts573</citation>
</citations>
</tool>"""
            
        self.useGM = cmd_exists('gm')
        self.useIM = cmd_exists('convert')
        self.useGS = cmd_exists('gs')
        self.temp_warned = False # we want only one warning if $TMP not set
        if opts.output_dir: # simplify for the tool tarball
            os.chdir(opts.output_dir)
        self.thumbformat = 'png'
        self.opts = opts
        self.toolname = re.sub('[^a-zA-Z0-9_]+', '', opts.tool_name) # a sanitizer now does this but..
        self.toolid = self.toolname
        self.myname = sys.argv[0] # get our name because we write ourselves out as a tool later
        self.pyfile = self.myname # crude but efficient - the cruft won't hurt much
        self.xmlfile = '%s.xml' % self.toolname
        rx = open(self.opts.script_path,'r').readlines()
        rx = [x.rstrip() for x in rx] # remove pesky dos line endings if needed
        self.script = '\n'.join(rx)
        fhandle,self.sfile = tempfile.mkstemp(prefix=self.toolname,suffix=".%s" % (opts.interpreter))
        tscript = open(self.sfile,'w') # use self.sfile as script source for Popen
        tscript.write(self.script)
        tscript.close()
        self.indentedScript = "  %s" % '\n'.join([' %s' % html_escape(x) for x in rx]) # for restructured text in help
        self.escapedScript = "%s" % '\n'.join([' %s' % html_escape(x) for x in rx])
        self.elog = os.path.join(self.opts.output_dir,"%s_error.log" % self.toolname)
        if opts.output_dir: # may not want these complexities 
            self.tlog = os.path.join(self.opts.output_dir,"%s_runner.log" % self.toolname)
            art = '%s.%s' % (self.toolname,opts.interpreter)
            artpath = os.path.join(self.opts.output_dir,art) # need full path
            artifact = open(artpath,'w') # use self.sfile as script source for Popen
            artifact.write(self.script)
            artifact.close()
        self.cl = []
        self.html = []
        self.test1Inputs = [] # now a list
        a = self.cl.append
        a(opts.interpreter)
        a(self.sfile)
        # if multiple inputs - positional or need to distinguish them with cl params
        if opts.input_tab:
            tests = []
            for i,intab in enumerate(opts.input_tab): # if multiple, make tests
                if intab.find(',') <> -1:
                    (gpath,uname) = intab.split(',')
                else:
                    gpath = uname = intab
                tests.append(os.path.basename(gpath))
            self.test1Inputs =  '<param name="input_tab" value="%s" />' % (','.join(tests))
        else:
            self.test1Inputs = ''
        # we always pass path,name pairs in using python optparse append
        # but the command line has to be different
        self.infile_paths = ''
        self.infile_names = ''
        if self.opts.input_tab:
            self.infile_paths = ','.join([x.split(',')[0].strip() for x in self.opts.input_tab])
            self.infile_names = ','.join([x.split(',')[1].strip() for x in self.opts.input_tab])
        if self.opts.interpreter == 'python':
            # yes, this is how additional parameters are always passed in python - to the TF itself and to
            # scripts to avoid having unknown parameter names (yes, they can be parsed but...) on the command line
            if self.opts.input_tab:
                a('--INPATHS "%s"' % (self.infile_paths)) 
                a('--INNAMES "%s"' % (self.infile_names)) 
            if self.opts.output_tab:
                a('--OUTPATH "%s"' % self.opts.output_tab) 
            for p in opts.additional_parameters:
                p = p.replace('"','')
                psplit = p.split(',')
                param = html_unescape(psplit[0])
                value = html_unescape(psplit[1])
                a('%s="%s"' % (param,value))
        if (self.opts.interpreter == 'Rscript'):
            # pass params on command line as expressions which the script evaluates - see sample
            if self.opts.input_tab:
                a('INPATHS="%s"' % self.infile_paths)
                a('INNAMES="%s"' % self.infile_names)
            if self.opts.output_tab:
                a('OUTPATH="%s"' % self.opts.output_tab) 
            for p in opts.additional_parameters:
                p = p.replace('"','')
                psplit = p.split(',')
                param = html_unescape(psplit[0])
                value = html_unescape(psplit[1])
                a('%s=%s' % (param,quote_non_numeric(value)))
        if (self.opts.interpreter == 'perl'):
            # pass positional params on command line - perl script needs to discombobulate the path/name lists
            if self.opts.input_tab:
                a('%s' % self.infile_paths)
                a('%s' % self.infile_names)
            if self.opts.output_tab:
                a('%s' % self.opts.output_tab)
            for p in opts.additional_parameters:
                # followed by any additional name=value parameter pairs
                p = p.replace('"','')
                psplit = p.split(',')
                param = html_unescape(psplit[0])
                value = html_unescape(psplit[1])
                a('%s=%s' % (param,quote_non_numeric(value)))
        if self.opts.interpreter == 'sh' or self.opts.interpreter == 'bash':
              # more is better - now move all params into environment AND drop on to command line.
              self.cl.insert(0,'env')
              if self.opts.input_tab:
                  self.cl.insert(1,'INPATHS=%s' % (self.infile_paths))
                  self.cl.insert(2,'INNAMES=%s' % (self.infile_names))
              if self.opts.output_tab:
                  self.cl.insert(3,'OUTPATH=%s' % (self.opts.output_tab))
                  a('OUTPATH=%s' % (self.opts.output_tab))
              # sets those environment variables for the script
              # additional params appear in CL - yes, it's confusing
              for i,p in enumerate(opts.additional_parameters):
                  psplit = p.split(',')
                  param = html_unescape(psplit[0])
                  value = html_unescape(psplit[1])
                  a('%s=%s' % (param,quote_non_numeric(value)))
                  self.cl.insert(4+i,'%s=%s' % (param,quote_non_numeric(value)))
        self.interpreter_owner = 'SYSTEM'
        self.interpreter_pack = 'SYSTEM'
        self.interpreter_name = 'SYSTEM'
        self.interpreter_version = 'SYSTEM'
        self.interpreter_revision = 'SYSTEM'
        if opts.envshpath <> 'system': # need to parse out details for our tool_dependency
            try: # fragile - depends on common naming convention as at jan 2015 = package_[interp]_v0_v1_v2... = version v0.v1.v2.. is in play
                # this ONLY happens at tool generation by an admin - the generated tool always uses the default of system so path is from local env.sh
                packdetails = opts.envshpath.split(os.path.sep)[-4:-1]  # eg ['fubar', 'package_r_3_1_1', '63cdb9b2234c']
                self.interpreter_owner = packdetails[0]
                self.interpreter_pack = packdetails[1]
                self.interpreter_name = packdetails[1].split('_')[1].upper()
                self.interpreter_revision = packdetails[2]
                self.interpreter_version =  '.'.join(packdetails[1].split('_')[2:])
            except:
                pass
        self.outFormats = opts.output_format
        self.inputFormats = opts.input_formats
        self.test1Output = '%s_test1_output.xls' % self.toolname
        self.test1HTML = '%s_test1_output.html' % self.toolname

    def makeXML(self):
        """
        Create a Galaxy xml tool wrapper for the new script as a string to write out
        fixme - use templating or something less fugly than this example of what we produce

        <tool id="reverse" name="reverse" version="0.01">
            <description>a tabular file</description>
            <command interpreter="python">
            reverse.py --script_path "$runMe" --interpreter "python" 
            --tool_name "reverse" --input_tab "$input1" --output_tab "$output1" 
            </command>
            <inputs>
            <param name="input1"  type="data" format="tabular" label="Select one or more input files from your history"/>
            <param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="reverse"/>
            </inputs>
            <outputs>
            <data format="tabular" name="output1q" label="${job_name}"/>

            </outputs>
            <help>
            
**What it Does**

Reverse the columns in a tabular file

            </help>
            <configfiles>
            <configfile name="runMe">
            
# reverse order of columns in a tabular file
import sys
inp = sys.argv[1]
outp = sys.argv[2]
i = open(inp,'r')
o = open(outp,'w')
for row in i:
     rs = row.rstrip().split('\t')
     rs.reverse()
     o.write('\t'.join(rs))
     o.write('\n')
i.close()
o.close()
 

            </configfile>
            </configfiles>
            </tool>
        
        """ 

        # these templates need a dict with the right keys to match the parameters - outputs, help, code...

        xdict = {}
        xdict['additionalParams'] = ''
        xdict['additionalInputs'] = ''
        if self.opts.additional_parameters:
            if self.opts.edit_additional_parameters: # add to new tool form with default value set to original value
                xdict['additionalInputs'] = '\n'.join(['<param name="%s" value="%s" label="%s" help="%s" type="%s"/>' % \
                (x.split(',')[0],html_escape(x.split(',')[1]),html_escape(x.split(',')[2]),html_escape(x.split(',')[3]), x.split(',')[4]) for x in self.opts.additional_parameters])
        xdict['additionalParams'] = '\n'.join(['<param name="%s" value="%s" />' % (x.split(',')[0],html_escape(x.split(',')[1])) for x in self.opts.additional_parameters])
        xdict['interpreter_owner'] = self.interpreter_owner
        xdict['interpreter_version'] = self.interpreter_version
        xdict['interpreter_pack'] = self.interpreter_pack
        xdict['interpreter_name'] = self.interpreter_name
        xdict['requirements'] = ''
        if self.opts.include_dependencies == "yes":
            if self.opts.envshpath <> 'system':
                xdict['requirements'] = self.protorequirements_interpreter % xdict       
            else:    
                xdict['requirements'] = self.protorequirements
        xdict['tool_version'] = self.opts.tool_version
        xdict['test1HTML'] = self.test1HTML
        xdict['test1Output'] = self.test1Output
        xdict['test1Inputs'] = self.test1Inputs
        if self.opts.make_HTML and self.opts.output_tab:
            xdict['tooltests'] = self.tooltestsBoth % xdict
        elif self.opts.make_HTML:
            xdict['tooltests'] = self.tooltestsHTMLOnly % xdict
        else:
            xdict['tooltests'] = self.tooltestsTabOnly % xdict
        xdict['script'] = self.escapedScript 
        # configfile is least painful way to embed script to avoid external dependencies
        # but requires escaping of <, > and $ to avoid Mako parsing
        if self.opts.help_text:
            helptext = open(self.opts.help_text,'r').readlines()
            helptext = [html_escape(x) for x in helptext] # must html escape here too - thanks to Marius van den Beek
            xdict['help'] = ''.join([x for x in helptext])
        else:
            xdict['help'] = 'Please ask the tool author (%s) for help as none was supplied at tool generation\n' % (self.opts.user_email)
        coda = ['**Script**','Pressing execute will run the following code over your input file and generate some outputs in your history::']
        coda.append('\n')
        coda.append(self.indentedScript)
        coda.append('\n**Attribution**\nThis Galaxy tool was created by %s at %s\nusing the Galaxy Tool Factory.\n' % (self.opts.user_email,timenow()))
        coda.append('See %s for details of that project' % (toolFactoryURL))
        coda.append('Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. ')
        coda.append('Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573\n')
        xdict['help'] = '%s\n%s' % (xdict['help'],'\n'.join(coda))
        if self.opts.tool_desc:
            xdict['tooldesc'] = '<description>%s</description>' % self.opts.tool_desc
        else:
            xdict['tooldesc'] = ''
        xdict['command_outputs'] = '' 
        xdict['outputs'] = '' 
        if self.opts.input_tab:
            cins = ['\n',]
            cins.append('--input_formats %s' % self.opts.input_formats)
            cins.append('#for intab in $input1:')
            cins.append('--input_tab "${intab},${intab.name}"')
            cins.append('#end for\n')
            xdict['command_inputs'] = '\n'.join(cins)
            xdict['inputs'] = '''<param name="input_tab" multiple="true"  type="data" format="%s" label="Select one or more %s input files from your history"
                    help="Multiple inputs may be selected assuming the script can deal with them..."/> \n''' % (self.inputFormats,self.inputFormats)
        else:
            xdict['command_inputs'] = '' # assume no input - eg a random data generator       
            xdict['inputs'] = ''
        if (len(self.opts.additional_parameters) > 0):
            cins = ['\n',]
            for params in self.opts.additional_parameters:
                    psplit = params.split(',') # name,value...
                    psplit[3] = html_escape(psplit[3])
                    if self.opts.edit_additional_parameters:
                        psplit[1] = '$%s' % psplit[0] # replace with form value
                    else:
                        psplit[1] = html_escape(psplit[1]) # leave prespecified value
                    cins.append('--additional_parameters """%s"""' % ','.join(psplit)) 
            xdict['command_inputs'] = '%s\n%s' % (xdict['command_inputs'],'\n'.join(cins))
        xdict['inputs'] += '<param name="job_name" type="text" size="60" label="Supply a name for the outputs to remind you what they contain" value="%s"/> \n' % self.toolname
        xdict['toolname'] = self.toolname
        xdict['toolid'] = self.toolid
        xdict['interpreter'] = self.opts.interpreter
        xdict['scriptname'] = self.sfile
        if self.opts.make_HTML:
            xdict['command_outputs'] += ' --output_dir "$html_file.files_path" --output_html "$html_file" --make_HTML "yes"'
            xdict['outputs'] +=  ' <data format="html" name="html_file" label="${job_name}.html"/>\n'
        else:
            xdict['command_outputs'] += ' --output_dir "./"' 
        if self.opts.output_tab:
            xdict['command_outputs'] += ' --output_tab "$output1"'
            xdict['outputs'] += ' <data format="%s" name="output1" label="${job_name}"/>\n' % self.outFormats
        xdict['command'] = self.newCommand % xdict
        if self.opts.citations:
            citationstext = open(self.opts.citations,'r').read()
            citation_tuples = parse_citations(citationstext)
            citations_xml = ""
            for citation_type, citation_content in citation_tuples:
                citation_xml = """<citation type="%s">%s</citation>""" % (citation_type, html_escape(citation_content))
                citations_xml += citation_xml
            xdict['citations'] = citations_xml
        else:
            xdict['citations'] = ""
        xmls = self.newXML % xdict
        xf = open(self.xmlfile,'w')
        xf.write(xmls)
        xf.write('\n')
        xf.close()
        # ready for the tarball


    def makeTooltar(self):
        """
        a tool is a gz tarball with eg
        /toolname/tool.xml /toolname/tool.py /toolname/test-data/test1_in.foo ...
        """
        retval = self.run()
        if retval:
            print >> sys.stderr,'## Run failed. Cannot build yet. Please fix and retry'
            sys.exit(1)
        tdir = self.toolname
        os.mkdir(tdir)
        self.makeXML()
        if self.opts.help_text:
            hlp = open(self.opts.help_text,'r').read()
        else:
            hlp = 'Please ask the tool author for help as none was supplied at tool generation\n'
        readme_dict = {'readme':hlp,'interpreter':self.opts.interpreter,'interpreter_version':self.interpreter_version,'interpreter_name':self.interpreter_name,
        'interpreter_owner':self.interpreter_owner,'interpreter_pack':self.interpreter_pack}
        if self.opts.include_dependencies == "yes":
            if self.opts.envshpath == 'system':
                tooldepcontent = self.toolhtmldepskel % readme_dict
            else:
                tooldepcontent = self.toolhtmldepinterpskel % readme_dict
        else:
            tooldepcontent = self.emptytoolhtmldepskel  % readme_dict
        depf = open(os.path.join(tdir,'tool_dependencies.xml'),'w')
        depf.write(tooldepcontent)
        depf.write('\n')
        depf.close()
        testdir = os.path.join(tdir,'test-data')
        os.mkdir(testdir) # make tests directory
        for i,intab in enumerate(self.opts.input_tab):
            si = self.opts.input_tab[i]
            if si.find(',') <> -1:
                s = si.split(',')[0]
                si = s
            dest = os.path.join(testdir,os.path.basename(si))
            if si <> dest:
                shutil.copyfile(si,dest)
        if self.opts.output_tab:
            shutil.copyfile(self.opts.output_tab,os.path.join(testdir,self.test1Output))
        if self.opts.make_HTML:
            shutil.copyfile(self.opts.output_html,os.path.join(testdir,self.test1HTML))
        if self.opts.output_dir:
            shutil.copyfile(self.tlog,os.path.join(testdir,'test1_out.log'))
        outpif = '%s.py' % self.toolname # new name
        outpiname = os.path.join(tdir,outpif) # path for the tool tarball
        pyin = os.path.basename(self.pyfile) # our name - we rewrite ourselves (TM)
        notes = ['# %s - a self annotated version of %s generated by running %s\n' % (outpiname,pyin,pyin),]
        notes.append('# to make a new Galaxy tool called %s\n' % self.toolname)
        notes.append('# User %s at %s\n' % (self.opts.user_email,timenow()))
        pi = open(self.pyfile,'r').readlines() # our code becomes new tool wrapper (!) - first Galaxy worm
        notes += pi
        outpi = open(outpiname,'w')
        outpi.write(''.join(notes))
        outpi.write('\n')
        outpi.close()
        stname = os.path.join(tdir,self.sfile)
        if not os.path.exists(stname):
            shutil.copyfile(self.sfile, stname)
        xtname = os.path.join(tdir,self.xmlfile)
        if not os.path.exists(xtname):
            shutil.copyfile(self.xmlfile,xtname)
        tarpath = "%s.tar.gz" % self.toolname
        tar = tarfile.open(tarpath, "w:gz")
        tar.add(tdir,arcname='%s' % self.toolname)
        tar.close()
        shutil.copyfile(tarpath,self.opts.new_tool)
        shutil.rmtree(tdir)
        ## TODO: replace with optional direct upload to local toolshed?
        return retval


    def compressPDF(self,inpdf=None,thumbformat='png'):
        """need absolute path to pdf
           note that GS gets confoozled if no $TMP or $TEMP
           so we set it
        """
        assert os.path.isfile(inpdf), "## Input %s supplied to %s compressPDF not found" % (inpdf,self.myName)
        hlog = os.path.join(self.opts.output_dir,"compress_%s.txt" % os.path.basename(inpdf))
        sto = open(hlog,'a')
        our_env = os.environ.copy()
        our_tmp = our_env.get('TMP',None)
        if not our_tmp:
            our_tmp = our_env.get('TEMP',None)
        if not (our_tmp and os.path.exists(our_tmp)):
            newtmp = os.path.join(self.opts.output_dir,'tmp')
            try:
                os.mkdir(newtmp)
            except:
                sto.write('## WARNING - cannot make %s - it may exist or permissions need fixing\n' % newtmp)
            our_env['TEMP'] = newtmp
            if not self.temp_warned:
               sto.write('## WARNING - no $TMP or $TEMP!!! Please fix - using %s temporarily\n' % newtmp)
               self.temp_warned = True          
        outpdf = '%s_compressed' % inpdf
        cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dUseCIEColor", "-dBATCH","-dPDFSETTINGS=/printer", "-sOutputFile=%s" % outpdf,inpdf]
        x = subprocess.Popen(cl,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
        retval1 = x.wait()
        sto.close()
        if retval1 == 0:
            os.unlink(inpdf)
            shutil.move(outpdf,inpdf)
            os.unlink(hlog)
        hlog = os.path.join(self.opts.output_dir,"thumbnail_%s.txt" % os.path.basename(inpdf))
        sto = open(hlog,'w')
        outpng = '%s.%s' % (os.path.splitext(inpdf)[0],thumbformat)
        if self.useGM:        
            cl2 = ['gm', 'convert', inpdf, outpng]
        else: # assume imagemagick
            cl2 = ['convert', inpdf, outpng]
        x = subprocess.Popen(cl2,stdout=sto,stderr=sto,cwd=self.opts.output_dir,env=our_env)
        retval2 = x.wait()
        sto.close()
        if retval2 == 0:
             os.unlink(hlog)
        retval = retval1 or retval2
        return retval


    def getfSize(self,fpath,outpath):
        """
        format a nice file size string
        """
        size = ''
        fp = os.path.join(outpath,fpath)
        if os.path.isfile(fp):
            size = '0 B'
            n = float(os.path.getsize(fp))
            if n > 2**20:
                size = '%1.1f MB' % (n/2**20)
            elif n > 2**10:
                size = '%1.1f KB' % (n/2**10)
            elif n > 0:
                size = '%d B' % (int(n))
        return size

    def makeHtml(self):
        """ Create an HTML file content to list all the artifacts found in the output_dir
        """

        galhtmlprefix = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 
        <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 
        <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" /> 
        <title></title> 
        <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> 
        </head> 
        <body> 
        <div class="toolFormBody"> 
        """ 
        galhtmlattr = """<hr/><div class="infomessage">This tool (%s) was generated by the <a href="https://bitbucket.org/fubar/galaxytoolfactory/overview">Galaxy Tool Factory</a></div><br/>""" 
        galhtmlpostfix = """</div></body></html>\n"""

        flist = os.listdir(self.opts.output_dir)
        flist = [x for x in flist if x <> 'Rplots.pdf']
        flist.sort()
        html = []
        html.append(galhtmlprefix % progname)
        html.append('<div class="infomessage">Galaxy Tool "%s" run at %s</div><br/>' % (self.toolname,timenow()))
        fhtml = []
        if len(flist) > 0:
            logfiles = [x for x in flist if x.lower().endswith('.log')] # log file names determine sections
            logfiles.sort()
            logfiles = [x for x in logfiles if os.path.abspath(x) <> os.path.abspath(self.tlog)]
            logfiles.append(os.path.abspath(self.tlog)) # make it the last one
            pdflist = []
            npdf = len([x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'])
            for rownum,fname in enumerate(flist):
                dname,e = os.path.splitext(fname)
                sfsize = self.getfSize(fname,self.opts.output_dir)
                if e.lower() == '.pdf' : # compress and make a thumbnail
                    thumb = '%s.%s' % (dname,self.thumbformat)
                    pdff = os.path.join(self.opts.output_dir,fname)
                    retval = self.compressPDF(inpdf=pdff,thumbformat=self.thumbformat)
                    if retval == 0:
                        pdflist.append((fname,thumb))
                    else:
                        pdflist.append((fname,fname))
                if (rownum+1) % 2 == 0:
                    fhtml.append('<tr class="odd_row"><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
                else:
                    fhtml.append('<tr><td><a href="%s">%s</a></td><td>%s</td></tr>' % (fname,fname,sfsize))
            for logfname in logfiles: # expect at least tlog - if more
                if os.path.abspath(logfname) == os.path.abspath(self.tlog): # handled later
                    sectionname = 'All tool run'
                    if (len(logfiles) > 1):
                        sectionname = 'Other'
                    ourpdfs = pdflist
                else:
                    realname = os.path.basename(logfname)
                    sectionname = os.path.splitext(realname)[0].split('_')[0] # break in case _ added to log
                    ourpdfs = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] == sectionname]
                    pdflist = [x for x in pdflist if os.path.basename(x[0]).split('_')[0] <> sectionname] # remove
                nacross = 1
                npdf = len(ourpdfs)

                if npdf > 0:
                    nacross = math.sqrt(npdf) ## int(round(math.log(npdf,2)))
                    if int(nacross)**2 != npdf:
                        nacross += 1
                    nacross = int(nacross)
                    width = min(400,int(1200/nacross))
                    html.append('<div class="toolFormTitle">%s images and outputs</div>' % sectionname)
                    html.append('(Click on a thumbnail image to download the corresponding original PDF image)<br/>')
                    ntogo = nacross # counter for table row padding with empty cells
                    html.append('<div><table class="simple" cellpadding="2" cellspacing="2">\n<tr>')
                    for i,paths in enumerate(ourpdfs): 
                        fname,thumb = paths
                        s= """<td><a href="%s"><img src="%s" title="Click to download a PDF of %s" hspace="5" width="%d" 
                           alt="Image called %s"/></a></td>\n""" % (fname,thumb,fname,width,fname)
                        if ((i+1) % nacross == 0):
                            s += '</tr>\n'
                            ntogo = 0
                            if i < (npdf - 1): # more to come
                               s += '<tr>'
                               ntogo = nacross
                        else:
                            ntogo -= 1
                        html.append(s)
                    if html[-1].strip().endswith('</tr>'):
                        html.append('</table></div>\n')
                    else:
                        if ntogo > 0: # pad
                           html.append('<td>&nbsp;</td>'*ntogo)
                        html.append('</tr></table></div>\n')
                logt = open(logfname,'r').readlines()
                logtext = [x for x in logt if x.strip() > '']
                html.append('<div class="toolFormTitle">%s log output</div>' % sectionname)
                if len(logtext) > 1:
                    html.append('\n<pre>\n')
                    html += logtext
                    html.append('\n</pre>\n')
                else:
                    html.append('%s is empty<br/>' % logfname)
        if len(fhtml) > 0:
           fhtml.insert(0,'<div><table class="colored" cellpadding="3" cellspacing="3"><tr><th>Output File Name (click to view)</th><th>Size</th></tr>\n')
           fhtml.append('</table></div><br/>')
           html.append('<div class="toolFormTitle">All output files available for downloading</div>\n')
           html += fhtml # add all non-pdf files to the end of the display
        else:
            html.append('<div class="warningmessagelarge">### Error - %s returned no files - please confirm that parameters are sane</div>' % self.opts.interpreter)
        html.append(galhtmlpostfix)
        htmlf = file(self.opts.output_html,'w')
        htmlf.write('\n'.join(html))
        htmlf.write('\n')
        htmlf.close()
        self.html = html



    def run(self):
        """
        Some devteam tools have this defensive stderr read so I'm keeping with the faith
        Feel free to update. 
        """
        if self.opts.envshpath <> 'system':
            shell_source(self.opts.envshpath)
            # this only happens at tool generation - the generated tool relies on the dependencies all being set up
            # at toolshed installation by sourcing local env.sh 
        if self.opts.output_dir:
            ste = open(self.elog,'wb')
            sto = open(self.tlog,'wb')
            s = ' '.join(self.cl)
            sto.write('## Executing Toolfactory generated command line = %s\n' % s)
            sto.flush()
            p = subprocess.Popen(self.cl,shell=False,stdout=sto,stderr=ste,cwd=self.opts.output_dir)
            retval = p.wait()
            sto.close()
            ste.close()
            tmp_stderr = open( self.elog, 'rb' )
            err = ''
            buffsize = 1048576
            try:
                while True:
                    err += tmp_stderr.read( buffsize )
                    if not err or len( stderr ) % buffsize != 0:
                        break
            except OverflowError:
                pass
            tmp_stderr.close()
        else:
            p = subprocess.Popen(self.cl,shell=False)
            retval = p.wait()
        if self.opts.output_dir:
            if retval <> 0 and err: # problem
                print >> sys.stderr,err
        if self.opts.make_HTML:
            self.makeHtml()
        return retval

  

def main():
    u = """
    This is a Galaxy wrapper. It expects to be called by a special purpose tool.xml as:
    <command interpreter="python">rgBaseScriptWrapper.py --script_path "$scriptPath" --tool_name "foo" --interpreter "Rscript"
    </command>
    """
    op = optparse.OptionParser()
    a = op.add_option
    a('--script_path',default=None)
    a('--tool_name',default=None)
    a('--interpreter',default=None)
    a('--output_dir',default='./')
    a('--output_html',default=None)
    a('--input_tab',default=[], action="append") # these are "galaxypath,metadataname" pairs
    a("--input_formats",default="tabular")
    a('--output_tab',default=None)
    a('--output_format',default='tabular')
    a('--user_email',default='Unknown')
    a('--bad_user',default=None)
    a('--make_Tool',default=None)
    a('--make_HTML',default=None)
    a('--help_text',default=None)
    a('--tool_desc',default=None)
    a('--new_tool',default=None)
    a('--tool_version',default=None)
    a('--include_dependencies',default=None)   
    a('--citations',default=None)
    a('--additional_parameters', dest='additional_parameters', action='append', default=[])
    a('--edit_additional_parameters', action="store_true", default=False)
    a('--envshpath',default="system")   
    opts, args = op.parse_args()
    assert not opts.bad_user,'UNAUTHORISED: %s is NOT authorized to use this tool until Galaxy admin adds %s to admin_users in universe_wsgi.ini' % (opts.bad_user,opts.bad_user)
    assert opts.tool_name,'## Tool Factory expects a tool name - eg --tool_name=DESeq'
    assert opts.interpreter,'## Tool Factory wrapper expects an interpreter - eg --interpreter=Rscript'
    assert os.path.isfile(opts.script_path),'## Tool Factory wrapper expects a script path - eg --script_path=foo.R'
    if opts.output_dir:
        try:
            os.makedirs(opts.output_dir)
        except:
            pass
    opts.input_tab = [x.replace('"','').replace("'",'') for x in opts.input_tab]
    for i,x in enumerate(opts.additional_parameters): # remove quotes we need to deal with spaces in CL params
        opts.additional_parameters[i] = opts.additional_parameters[i].replace('"','')
    r = ScriptRunner(opts)
    if opts.make_Tool:
        retcode = r.makeTooltar()
    else:
        retcode = r.run()
    os.unlink(r.sfile)
    if retcode:
        sys.exit(retcode) # indicate failure to job runner


if __name__ == "__main__":
    main()