changeset 0:e9981e6af666 draft

Improved some datatype handling
author galaxyp
date Thu, 20 Jun 2013 11:07:47 -0400
parents
children 0d0cbb69a03f
files LICENSE README.md README_GALAXYP.md README_REPO.md datatypes_conf.xml macros.xml scaffold.py scaffold.xml scaffold_export.xml scaffold_wrapper.py update.sh
diffstat 11 files changed, 671 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,11 @@
+--2012-09-19 08:46:18--  http://www.apache.org/licenses/LICENSE-2.0.txt
+Resolving www.apache.org... 140.211.11.131, 192.87.106.229, 2001:610:1:80bc:192:87:106:229
+Connecting to www.apache.org|140.211.11.131|:80... connected.
+HTTP request sent, awaiting response... 200 OK
+Length: 11358 (11K) [text/plain]
+Saving to: “LICENSE-2.0.txt”
+
+     0K .......... .                                          100%  200K=0.06s
+
+2012-09-19 08:46:18 (200 KB/s) - “LICENSE-2.0.txt” saved [11358/11358]
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,23 @@
+Tool wrapper for the commercial proteomics application Scaffold.
+# Obtaining Tools
+
+Repositories for all Galaxy-P tools can be found at
+https:/bitbucket.org/galaxyp/.
+
+# Contact
+
+Please send suggestions for improvements and bug reports to
+jmchilton@gmail.com.
+
+# License
+
+All Galaxy-P tools are licensed under the Apache License Version 2.0
+unless otherwise documented.
+
+# Tool Versioning
+
+Galaxy-P tools will have versions of the form X.Y.Z. Versions
+differing only after the second decimal should be completely
+compatible with each other. Breaking changes should result in an
+increment of the number before and/or after the first decimal. All
+tools of version less than 1.0.0 should be considered beta.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README_GALAXYP.md	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,22 @@
+# Obtaining Tools
+
+Repositories for all Galaxy-P tools can be found at
+https:/bitbucket.org/galaxyp/.
+
+# Contact
+
+Please send suggestions for improvements and bug reports to
+jmchilton@gmail.com.
+
+# License
+
+All Galaxy-P tools are licensed under the Apache License Version 2.0
+unless otherwise documented.
+
+# Tool Versioning
+
+Galaxy-P tools will have versions of the form X.Y.Z. Versions
+differing only after the second decimal should be completely
+compatible with each other. Breaking changes should result in an
+increment of the number before and/or after the first decimal. All
+tools of version less than 1.0.0 should be considered beta.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README_REPO.md	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,1 @@
+Tool wrapper for the commercial proteomics application Scaffold.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<datatypes>
+  <datatype_files>
+    <datatype_file name="scaffold.py"/>
+  </datatype_files>
+  <registration>
+    <datatype extension="sf3" type="galaxy.datatypes.scaffold:Sf3" display_in_upload="true" />
+  </registration>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,30 @@
+<macros>
+  <macro name="threshold">
+    <conditional name="threshold">
+      <param type="select" name="type" label="Specify Threshold">
+        <option value="none">None (just use deafults></option>
+        <option value="simple">With Simple Options</option>
+        <option value="advanced">With Advanced Options</option>
+      </param>
+      <when value="none">
+      </when>
+      <when value="simple">
+        <expand macro="simple_options" />
+      </when>
+      <when value="advanced">
+        <expand macro="simple_options" />
+        <param type="boolean" truevalue="" falsevalue="--ignore_charge_1" label="Use Charge +1" name="ignore_charge_1" checked="true" />
+        <param type="boolean" truevalue="" falsevalue="--ignore_charge_2" label="Use Charge +2" name="ignore_charge_2" checked="true" />
+        <param type="boolean" truevalue="" falsevalue="--ignore_charge_3" label="Use Charge +3" name="ignore_charge_3" checked="true" />
+        <param type="boolean" truevalue="" falsevalue="--ignore_charge_4" label="Use Charge +4 and Higher" name="ignore_charge_4" checked="true" />
+        <param name="minimum_ntt" label="Minimum Number of Enzymatic Termini (NTT)" type="integer" help="" value="0" />
+        <param name="minimum_peptide_length" label="Minimum Peptide Length" type="integer" help="" value="0" />
+      </when>
+    </conditional>
+  </macro>
+  <macro name="simple_options">
+    <param name="protein_probability" label="Protein Probability" type="float" help="" value="0.99" />
+    <param name="minimum_peptide_count" label="Minimum Peptide Count" type="integer" help="" value="2" />
+    <param name="peptide_probability" label="Peptide Probability" type="float" help="" value="0.95" />
+  </macro>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scaffold.py	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,8 @@
+from galaxy.datatypes.binary import Binary
+
+
+class Sf3(Binary):
+    """Class describing a Scaffold SF3 files"""
+    file_ext = "sf3"
+
+Binary.register_unsniffable_binary_ext('sf3')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scaffold.xml	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,139 @@
+<tool id="scaffold" name="Scaffold" version="0.1.0">
+  <description>
+    Visualize and Validate Complex MS/MS Proteomics Experiments
+  </description>
+  <configfiles>
+    <configfile name="sample_config"># Simple format group:group_name followed by pairs of name:name and path:path lines
+#if $sample_mode.mode == "full"
+#set $samples = $sample_mode.samples
+#for $sample in $samples:
+#if $sample.category.specify 
+#set $category = $sample.category.name
+#else
+#set $category = $sample.sample_name 
+#end if
+sample:$sample.sample_name
+mudpit:$sample.mudpit
+category:$category
+#for $sample_input in $sample.sample_inputs:
+name:${sample_input.display_name}
+path:${sample_input}
+ext:${sample_input.ext}
+#end for
+#end for
+#elif $sample_mode.mode == "sample_per_file":
+#for $sample_input in $sample_mode.sample_inputs:
+sample:${sample_input.display_name}
+mudpit:false
+category:${sample_input.display_name}
+name:${sample_input.display_name}
+path:${sample_input}
+ext:${sample_input.ext}
+#end for
+#end if
+    </configfile>
+  </configfiles>
+  <command interpreter="python">
+    scaffold_wrapper.py run \
+    --samples $sample_config \
+    --database $database \
+    --database_name '$database.display_name'\
+    --output $output \
+    --database_type $database_type \
+    --database_decoy_regex '$database_decoy_regex' \
+    #if $thresholds.specify
+    --protein_probability '$thresholds.protein_probability' \
+    --peptide_probability '$thresholds.peptide_probability' \
+    #end if
+    #if $advanced.specify
+    #if $advanced.output_driver
+    --output_driver $output_drirver \
+    #end if
+    #end if
+  </command>
+  <inputs>
+    <param format="fasta" name="database" type="data" label="Search Database" />
+    <param type="select" name="database_type" label="Database Type">
+      <option value="GENERIC">Generic</option>
+      <option value="ESTNR">EST/NR (NCBI)</option>
+      <option value="IPI">IPI (EBI)</option>
+      <option value="SWISSPROT">Swiss-Prot (SIB/EBI)</option>
+      <option value="UNIPROT">UniProt/Swiss-Prot (UniProtKB)</option>
+      <option value="UNIREF">UniRef/NREF (UniProt)</option>
+      <option value="ENSEMBL">Ensembl (EMBL/EBI)</option>
+      <option value="MSDB">MSDB (Proteomics Group)</option>
+    </param>
+    <param name="database_decoy_regex" type="text" label="Database Decoy Prefix" help="Regular expression describing decoys in specified FASTA databse." value="REV|RRR">
+      <sanitizer>
+        <valid initial="string.printable">
+          <add value="|"/>
+        </valid>
+      </sanitizer>
+    </param>
+    <conditional name="sample_mode">
+      <param name="mode" type="select" label="How should samples be created">
+        <option value="full">Create Samples Manually</option>
+        <option value="sample_per_file">Create Sample per File</option>
+      </param>
+      <when value="full">
+        <repeat name="samples" title="Biological Sample">
+          <param name="sample_name" type="text" label="Sample Name" help="Name for the scample (use only letters and numbers)." />
+          <param format="t.xml,omx" name="sample_inputs" type="data" multiple="true" label="Sample Input Files" />
+          <param name="mudpit" type="boolean" label="Mudpit" help="" truevalue="true" falsevalue="false" />
+          <conditional name="category">
+            <param name="specify" type="boolean" label="Specify Custom Category" help="" truevalue="true" falsevalue="false" />
+            <when value="false" />
+            <when value="true">
+              <param name="name" type="text" label="Category Name" help="" />
+            </when>
+          </conditional>
+        </repeat>        
+      </when>
+      <when value="sample_per_file">
+        <param format="t.xml,omx" name="sample_inputs" type="data" multiple="true" label="Input Files" />
+      </when>
+    </conditional>
+    <conditional name="thresholds">
+      <param name="specify" type="boolean" label="Specify Reporting Thresholds" help="" truevalue="true" falsevalue="false" />
+      <when value="false" />
+      <when value="true">
+        <param name="protein_probability" type="float" label="Minimum Protein Probility" help="Optional value between 0.0 and 1.0." optional="true" />
+        <param name="peptide_probability" type="float" label="Minimum Peptide Probility" help="Optional value between 0.0 and 1.0." optional="true" />
+      </when>
+    </conditional>
+    <conditional name="advanced">
+      <param name="specify" type="boolean" label="Specify Advanced Options" help="" truevalue="true" falsevalue="false" />
+      <when value="false" />
+      <when value="true">
+        <param name="output_driver" type="boolean" label="Include Scaffold Driver File in Output" truevalue="true" falsevalue="false" />
+        <!--
+        <param name="ncbi_annotate" type="boolean" label="" truevalue="true" falsevalue="false" />
+        <param name="go_annotate" type="boolean" label="" truevalue="true" falsevalue="false" />
+      -->
+      </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="xml" name="output_drirver" label="Scaffold Driver for ${on_string}">
+      <filter>(advanced['specify'] and advanced["output_driver"])</filter>
+    </data>
+    <data format="sf3" name="output" />
+  </outputs>
+  <requirements>
+    <requirement type="package">scaffold</requirement>
+  </requirements>
+  <help>
+**What it does**
+
+Merges multiple protein identification search results together into a single SF3 file for viewing. A free viewer for Scaffold SF3 files can be obtained from Proteome software at http://www.proteomesoftware.com/Scaffold/Scaffold_viewer.htm.
+
+------
+
+
+**Citation**
+
+For the underlying tool, please cite `TODO`
+
+If you use this tool in Galaxy, please cite Chilton J, et al. https://bitbucket.org/galaxyp/galaxyp-toolshed-scaffold
+  </help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scaffold_export.xml	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,75 @@
+<tool id="scaffold_export" name="Scaffold Export" version="0.1.0">
+  <description>
+    Export summary from Scaffold SF3 file.
+  </description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <command interpreter="python">
+    scaffold_wrapper.py export \
+    --sf3 $sf3_input \
+    --output $output \
+    --export_type $export.export_type \
+    ## Begin Threshold Parameters
+    #set $threshold_type = $threshold.type
+    #set $threshold_options = $threshold
+    #if $threshold_type != "none"
+    --protein_probability=$threshold_options.protein_probability \
+    --peptide_probability=$threshold_options.peptide_probability \
+    --minimum_peptide_count=$threshold_options.minimum_peptide_count \
+    #if $threshold_type != "simple"
+    $threshold_options.ignore_charge_1 \
+    $threshold_options.ignore_charge_2 \
+    $threshold_options.ignore_charge_3 \
+    $threshold_options.ignore_charge_4 \
+    --minimum_ntt=$threshold_options.minimum_ntt \
+    --minimum_peptide_length=$threshold_options.minimum_peptide_length \
+    #end if
+    #end if
+    ## End Threshold Parameters
+  </command>
+  <inputs>
+    <param format="sf3" name="sf3_input" type="data" label="Scaffold Results" />
+    <conditional name="export">
+      <param name="export_type" type="select" label="Export Format">
+        <option value="mzIdentML">MzIdentML</option>
+        <option value="protxml">ProtXML</option>
+        <option value="statistics">Stastics (tabular)</option>
+        <option value="spectrum-report">Spectrum Report (tabular)</option>
+        <option value="peptide-report">Peptide Report (tabular)</option>
+        <option value="protein-report">Protein Report (tabular)</option>
+        <option value="publication-report">Publication Report (tabular)</option>
+        <option value="isoform-report">Isoform Report (tabular)</option>
+        <option value="spectrum-counting-report">Spectrum Counting Report (tabular)</option>
+        <option value="accession-report">Accession Report (tabular)</option>
+        <option value="experiment-report">Experiment Report (tabular)</option>
+      </param>
+    </conditional>
+    <expand macro="threshold" />
+  </inputs>
+  <outputs>
+    <data format="tabular" name="output">
+      <change_format>
+        <when input="export.export_type" value="mzIdentML" format="mzid" />
+        <when input="export.export_type" value="protxml" format="protxml" />
+      </change_format>
+    </data>
+  </outputs>
+  <requirements>
+    <requirement type="package">scaffold</requirement>
+  </requirements>
+  <help>
+**What it does**
+
+Export data out of Scaffold's binary data format (sf3) into tabular reports or XML.
+
+------
+
+
+**Citation**
+
+For the underlying tool, please cite `TODO`
+
+If you use this tool in Galaxy, please cite Chilton J, et al. https://bitbucket.org/galaxyp/galaxyp-toolshed-scaffold
+  </help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scaffold_wrapper.py	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,318 @@
+#!/usr/bin/env python
+import optparse
+import os
+import shutil
+import sys
+import tempfile
+import subprocess
+import logging
+from string import Template
+from xml.sax.saxutils import escape
+
+log = logging.getLogger(__name__)
+
+DEBUG = True
+
+working_directory = os.getcwd()
+tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name
+tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name
+
+
+def stop_err(msg):
+    sys.stderr.write("%s\n" % msg)
+    sys.exit()
+
+
+def read_stderr():
+    stderr = ''
+    if(os.path.exists(tmp_stderr_name)):
+        with open(tmp_stderr_name, 'rb') as tmp_stderr:
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read(buffsize)
+                    if not stderr or len(stderr) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+    return stderr
+
+
+def execute(command, stdin=None):
+    try:
+        with open(tmp_stderr_name, 'wb') as tmp_stderr:
+            with open(tmp_stdout_name, 'wb') as tmp_stdout:
+                proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ)
+                returncode = proc.wait()
+                if returncode != 0:
+                    raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr()))
+    finally:
+        print open(tmp_stderr_name, "r").read(64000)
+        print open(tmp_stdout_name, "r").read(64000)
+
+
+def delete_file(path):
+    if os.path.exists(path):
+        try:
+            os.remove(path)
+        except:
+            pass
+
+
+def delete_directory(directory):
+    if os.path.exists(directory):
+        try:
+            shutil.rmtree(directory)
+        except:
+            pass
+
+
+def symlink(source, link_name):
+    import platform
+    if platform.system() == 'Windows':
+        try:
+            import win32file
+            win32file.CreateSymbolicLink(source, link_name, 1)
+        except:
+            shutil.copy(source, link_name)
+    else:
+        os.symlink(source, link_name)
+
+
+def copy_to_working_directory(data_file, relative_path):
+    if os.path.abspath(data_file) != os.path.abspath(relative_path):
+        shutil.copy(data_file, relative_path)
+    return relative_path
+
+
+def __main__():
+    run_script()
+
+
+# Extra database attributes: name, databaseAccessionRegEx, databaseDescriptionRegEx, decoyProteinRegEx
+# Extra export types: protxml, spectrum-report, statistics, peptide-report, protein-report, experiment-report
+RUN_TEMPLATE = """<Scaffold>
+<Experiment name="Galaxy Scaffold Experiment">
+<FastaDatabase id="database"
+               path="$database_path"
+               name="$database_name"
+               databaseAccessionRegEx="$database_accession_regex"
+               databaseDescriptionRegEx="$database_description_regex"
+               decoyProteinRegEx="$database_decoy_regex"
+               />
+$samples
+$display_thresholds
+<Export type="sf3" path="$output_path" thresholds="thresh" />
+</Experiment>
+</Scaffold>
+"""
+
+EXPORT_TEMPLATE = """<Scaffold>
+<Experiment load="$sf3_path">
+$display_thresholds
+<Export $export_options path="$output_path" thresholds="thresh" />
+</Experiment>
+</Scaffold>
+"""
+
+def parse_groups(inputs_file, group_parts=["group"], input_parts=["name", "path"]):
+    inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()]
+    inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")]
+    cur_group = None
+    i = 0
+    group_prefixes = ["%s:" % group_part  for group_part in group_parts]
+    input_prefixes = ["%s:" % input_part for input_part in input_parts]
+    groups = {}
+    while i < len(inputs_lines):
+        line = inputs_lines[i]
+        if line.startswith(group_prefixes[0]):
+            # Start new group
+            cur_group = line[len(group_prefixes[0]):]
+            group_data = {}
+            for j, group_prefix in enumerate(group_prefixes):
+                group_line = inputs_lines[i + j]
+                group_data[group_parts[j]] = group_line[len(group_prefix):]
+            i += len(group_prefixes)
+        elif line.startswith(input_prefixes[0]):
+            input = []
+            for j, input_prefix in enumerate(input_prefixes):
+                part_line = inputs_lines[i + j]
+                part = part_line[len(input_prefixes[j]):]
+                input.append(part)
+            if cur_group not in groups:
+                groups[cur_group] = {"group_data": group_data, "inputs": []}
+            groups[cur_group]["inputs"].append(input)
+            i += len(input_prefixes)
+        else:
+            # Skip empty line
+            i += 1
+    return groups
+
+
+def build_samples(samples_file):
+    group_data = parse_groups(samples_file, group_parts=["sample", "mudpit", "category"], input_parts=["name", "path", "ext"])
+    samples_description = ""
+    for sample_name, sample_data in group_data.iteritems():
+        files = sample_data["inputs"]
+        mudpit = sample_data["group_data"]["mudpit"]
+        category = sample_data["group_data"]["category"]
+        samples_description += """<BiologicalSample database="database" name="%s" mudpit="%s" category="%s">\n""" % (sample_name, mudpit, category)
+        for (name, path, ext) in files:
+            name = os.path.basename(name)
+            if not name.lower().endswith(ext.lower()):
+                name = "%s.%s" % (name, ext)
+            symlink(path, name)
+            samples_description += "<InputFile>%s</InputFile>\n" % os.path.abspath(name)
+        samples_description += """</BiologicalSample>\n"""
+    return samples_description
+
+
+def run_script():
+    action = sys.argv[1]
+    if action == "run":
+        proc = scaffold_run
+    elif action == "export":
+        proc = scaffold_export
+    proc()
+
+
+def scaffold_export():
+    parser = optparse.OptionParser()
+    parser.add_option("--sf3")
+    parser.add_option("--output")
+    parser.add_option("--export_type")
+    populate_threshold_options(parser)
+    (options, args) = parser.parse_args()
+
+    template_parameters = {}
+
+    template_parameters["sf3_path"] = options.sf3
+    template_parameters["export_options"] = """ type="%s" """ % options.export_type
+    template_parameters["display_thresholds"] = build_display_thresholds(options)
+
+    execute_scaffold(options, EXPORT_TEMPLATE, template_parameters)
+
+
+def build_display_thresholds(options):
+    attributes = ['id="thresh"']
+    if options.protein_probability is not None:
+        attributes.append('proteinProbability="%s"' % options.protein_probability)
+    if options.peptide_probability is not None:
+        attributes.append('peptideProbability="%s"' % options.peptide_probability)
+    if options.minimum_peptide_count is not None:
+        attributes.append('minimumPeptideCount="%s"' % options.minimum_peptide_count)
+    if options.minimum_peptide_length is not None:
+        attributes.append('minimumPeptideLength="%s"' % options.minimum_peptide_length)
+    if options.minimum_ntt is not None:
+        attributes.append('minimumNTT="%s"' % options.minimum_ntt)
+    attributes.append('useCharge="%s"' % build_use_charge_option(options))
+    tag_open = "<DisplayThresholds " + " ".join(attributes) + ">"
+    tag_body = "".join([f(options) for f in [tandem_opts, omssa_opts]])
+    tag_close = "</DisplayThresholds>"
+    return tag_open + tag_body + tag_close
+
+
+def tandem_opts(options):
+    element = ""
+    tandem_score = options.tandem_score
+    if tandem_score:
+        element = '<TandemThresholds logExpectScores="%s,%s,%s,%s" />' % ((tandem_score,) * 4)
+    return element
+
+
+def omssa_opts(options):
+    return ""
+
+
+def build_use_charge_option(options):
+    use_charge_array = []
+    for i in ["1", "2", "3", "4"]:
+        use_charge_i = getattr(options, "use_charge_%s" % i, True)
+        use_charge_array.append("true" if use_charge_i else "false")
+    return ",".join(use_charge_array)
+
+
+def populate_threshold_options(option_parser):
+    option_parser.add_option("--protein_probability", default=None)
+    option_parser.add_option("--peptide_probability", default=None)
+    option_parser.add_option("--minimum_peptide_count", default=None)
+    option_parser.add_option("--ignore_charge_1", action="store_false", dest="use_charge_1", default=True)
+    option_parser.add_option("--ignore_charge_2", action="store_false", dest="use_charge_2", default=True)
+    option_parser.add_option("--ignore_charge_3", action="store_false", dest="use_charge_3", default=True)
+    option_parser.add_option("--ignore_charge_4", action="store_false", dest="use_charge_4", default=True)
+    option_parser.add_option("--minimum_peptide_length", default=None)
+    option_parser.add_option("--minimum_ntt", default=None)
+    option_parser.add_option("--tandem_score", default=None)
+    option_parser.add_option("--omssa_peptide_probability", default=None)
+    option_parser.add_option("--omssa_log_expect_score", default=None)
+
+
+def database_rules(database_type):
+    rules_dict = {
+      "ESTNR": (">(gi\\|[0-9]*)", ">[^ ]* (.*)"),
+      "IPI": (">IPI:([^\\| .]*)", ">[^ ]* Tax_Id=[0-9]* (.*)"),
+      "SWISSPROT": (">([^ ]*)", ">[^ ]* \\([^ ]*\\) (.*)"),
+      "UNIPROT": (">[^ ]*\\|([^ ]*)", ">[^ ]*\\|[^ ]* (.*)"),
+      "UNIREF": (">UniRef100_([^ ]*)", ">[^ ]* (.*)"),
+      "ENSEMBL": (">(ENS[^ ]*)", ">[^ ]* (.*)"),
+      "MSDB": (">([^ ]*)", ">[^ ]* (.*)"),
+      "GENERIC": (">([^ ]*)", ">[^ ]* (.*)"),
+    }
+    database_type = database_type if database_type in rules_dict else "GENERIC"
+    return rules_dict[database_type]
+
+
+def scaffold_run():
+    parser = optparse.OptionParser()
+    parser.add_option("--samples")
+    parser.add_option("--database")
+    parser.add_option("--database_name")
+    parser.add_option("--database_type")
+    parser.add_option("--database_decoy_regex")
+    parser.add_option("--output")
+    parser.add_option("--output_driver")
+    populate_threshold_options(parser)
+    (options, args) = parser.parse_args()
+
+    template_parameters = {}
+
+    # Read samples from config file and convert to XML
+    template_parameters["samples"] = build_samples(options.samples)
+    template_parameters["display_thresholds"] = build_display_thresholds(options)
+
+    # Setup database parameters
+    database_path = options.database
+    database_name = options.database_name
+    database_type = options.database_type
+    database_decoy_regex = options.database_decoy_regex
+
+    (accession_regex, description_regex) = database_rules(database_type)
+
+    template_parameters["database_path"] = database_path
+    template_parameters["database_name"] = database_name
+    template_parameters["database_accession_regex"] = escape(accession_regex)
+    template_parameters["database_description_regex"] = escape(description_regex)
+    template_parameters["database_decoy_regex"] = escape(database_decoy_regex)
+
+    execute_scaffold(options, RUN_TEMPLATE, template_parameters)
+
+    if options.output_driver:
+        shutil.copy("driver.xml", options.output_driver)
+
+
+def execute_scaffold(options, template, template_parameters):
+    # Setup output parameter
+    output_path = options.output
+    template_parameters["output_path"] = output_path
+
+    # Prepare and create driver file
+    driver_contents = Template(template).substitute(template_parameters)
+    print driver_contents
+    driver_path = os.path.abspath("driver.xml")
+    open(driver_path, "w").write(driver_contents)
+
+    # Run Scaffold
+    execute("ScaffoldBatch3 '%s'" % driver_path)
+
+if __name__ == '__main__':
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/update.sh	Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+LICENSE_FILE=LICENSE
+# Ensure repository contains license file.
+if [ ! -e "$LICENSE_FILE" ];
+then
+    wget http://www.apache.org/licenses/LICENSE-2.0.txt -O "$LICENSE_FILE"
+fi
+
+# Run repository specific update actions.
+if [ -f update_repo.sh ];
+then
+    ./update_repo.sh
+fi
+
+wget https://raw.github.com/gist/3749747/README_GALAXYP.md -O README_GALAXYP.md
+
+# Create repository README
+if [ ! -e README_REPO.md ];
+then
+    echo "TODO: Document this tool repository." > README_REPO.md
+fi
+cat README_REPO.md README_GALAXYP.md > README.md
+
+
+# If version file exists, update all tools to this version
+VERSION_FILE=version
+if [ -e "$VERSION_FILE" ];
+then
+    VERSION=`cat $VERSION_FILE`
+    
+    # Replace tool version in each tool XML file   `
+    find -iname "*xml" -exec sed -i'' -e '0,/version="\(.\+\)"/s/version="\(.\+\)"/version="'$VERSION'"/1g' {} \;
+
+fi