# HG changeset patch
# User galaxyp
# Date 1371740867 14400
# Node ID e9981e6af6668304ce93b6f2accd00dec872d9a2
Improved some datatype handling
diff -r 000000000000 -r e9981e6af666 LICENSE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,11 @@
+--2012-09-19 08:46:18-- http://www.apache.org/licenses/LICENSE-2.0.txt
+Resolving www.apache.org... 140.211.11.131, 192.87.106.229, 2001:610:1:80bc:192:87:106:229
+Connecting to www.apache.org|140.211.11.131|:80... connected.
+HTTP request sent, awaiting response... 200 OK
+Length: 11358 (11K) [text/plain]
+Saving to: “LICENSE-2.0.txt”
+
+ 0K .......... . 100% 200K=0.06s
+
+2012-09-19 08:46:18 (200 KB/s) - “LICENSE-2.0.txt” saved [11358/11358]
+
diff -r 000000000000 -r e9981e6af666 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,23 @@
+Tool wrapper for the commercial proteomics application Scaffold.
+# Obtaining Tools
+
+Repositories for all Galaxy-P tools can be found at
+https:/bitbucket.org/galaxyp/.
+
+# Contact
+
+Please send suggestions for improvements and bug reports to
+jmchilton@gmail.com.
+
+# License
+
+All Galaxy-P tools are licensed under the Apache License Version 2.0
+unless otherwise documented.
+
+# Tool Versioning
+
+Galaxy-P tools will have versions of the form X.Y.Z. Versions
+differing only after the second decimal should be completely
+compatible with each other. Breaking changes should result in an
+increment of the number before and/or after the first decimal. All
+tools of version less than 1.0.0 should be considered beta.
diff -r 000000000000 -r e9981e6af666 README_GALAXYP.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README_GALAXYP.md Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,22 @@
+# Obtaining Tools
+
+Repositories for all Galaxy-P tools can be found at
+https:/bitbucket.org/galaxyp/.
+
+# Contact
+
+Please send suggestions for improvements and bug reports to
+jmchilton@gmail.com.
+
+# License
+
+All Galaxy-P tools are licensed under the Apache License Version 2.0
+unless otherwise documented.
+
+# Tool Versioning
+
+Galaxy-P tools will have versions of the form X.Y.Z. Versions
+differing only after the second decimal should be completely
+compatible with each other. Breaking changes should result in an
+increment of the number before and/or after the first decimal. All
+tools of version less than 1.0.0 should be considered beta.
diff -r 000000000000 -r e9981e6af666 README_REPO.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README_REPO.md Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,1 @@
+Tool wrapper for the commercial proteomics application Scaffold.
diff -r 000000000000 -r e9981e6af666 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r e9981e6af666 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r e9981e6af666 scaffold.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scaffold.py Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,8 @@
+from galaxy.datatypes.binary import Binary
+
+
+class Sf3(Binary):
+ """Class describing a Scaffold SF3 files"""
+ file_ext = "sf3"
+
+Binary.register_unsniffable_binary_ext('sf3')
diff -r 000000000000 -r e9981e6af666 scaffold.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scaffold.xml Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,139 @@
+
+
+ Visualize and Validate Complex MS/MS Proteomics Experiments
+
+
+ # Simple format group:group_name followed by pairs of name:name and path:path lines
+#if $sample_mode.mode == "full"
+#set $samples = $sample_mode.samples
+#for $sample in $samples:
+#if $sample.category.specify
+#set $category = $sample.category.name
+#else
+#set $category = $sample.sample_name
+#end if
+sample:$sample.sample_name
+mudpit:$sample.mudpit
+category:$category
+#for $sample_input in $sample.sample_inputs:
+name:${sample_input.display_name}
+path:${sample_input}
+ext:${sample_input.ext}
+#end for
+#end for
+#elif $sample_mode.mode == "sample_per_file":
+#for $sample_input in $sample_mode.sample_inputs:
+sample:${sample_input.display_name}
+mudpit:false
+category:${sample_input.display_name}
+name:${sample_input.display_name}
+path:${sample_input}
+ext:${sample_input.ext}
+#end for
+#end if
+
+
+
+ scaffold_wrapper.py run \
+ --samples $sample_config \
+ --database $database \
+ --database_name '$database.display_name'\
+ --output $output \
+ --database_type $database_type \
+ --database_decoy_regex '$database_decoy_regex' \
+ #if $thresholds.specify
+ --protein_probability '$thresholds.protein_probability' \
+ --peptide_probability '$thresholds.peptide_probability' \
+ #end if
+ #if $advanced.specify
+ #if $advanced.output_driver
+ --output_driver $output_drirver \
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (advanced['specify'] and advanced["output_driver"])
+
+
+
+
+ scaffold
+
+
+**What it does**
+
+Merges multiple protein identification search results together into a single SF3 file for viewing. A free viewer for Scaffold SF3 files can be obtained from Proteome software at http://www.proteomesoftware.com/Scaffold/Scaffold_viewer.htm.
+
+------
+
+
+**Citation**
+
+For the underlying tool, please cite `TODO`
+
+If you use this tool in Galaxy, please cite Chilton J, et al. https://bitbucket.org/galaxyp/galaxyp-toolshed-scaffold
+
+
\ No newline at end of file
diff -r 000000000000 -r e9981e6af666 scaffold_export.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scaffold_export.xml Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,75 @@
+
+
+ Export summary from Scaffold SF3 file.
+
+
+ macros.xml
+
+
+ scaffold_wrapper.py export \
+ --sf3 $sf3_input \
+ --output $output \
+ --export_type $export.export_type \
+ ## Begin Threshold Parameters
+ #set $threshold_type = $threshold.type
+ #set $threshold_options = $threshold
+ #if $threshold_type != "none"
+ --protein_probability=$threshold_options.protein_probability \
+ --peptide_probability=$threshold_options.peptide_probability \
+ --minimum_peptide_count=$threshold_options.minimum_peptide_count \
+ #if $threshold_type != "simple"
+ $threshold_options.ignore_charge_1 \
+ $threshold_options.ignore_charge_2 \
+ $threshold_options.ignore_charge_3 \
+ $threshold_options.ignore_charge_4 \
+ --minimum_ntt=$threshold_options.minimum_ntt \
+ --minimum_peptide_length=$threshold_options.minimum_peptide_length \
+ #end if
+ #end if
+ ## End Threshold Parameters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ scaffold
+
+
+**What it does**
+
+Export data out of Scaffold's binary data format (sf3) into tabular reports or XML.
+
+------
+
+
+**Citation**
+
+For the underlying tool, please cite `TODO`
+
+If you use this tool in Galaxy, please cite Chilton J, et al. https://bitbucket.org/galaxyp/galaxyp-toolshed-scaffold
+
+
\ No newline at end of file
diff -r 000000000000 -r e9981e6af666 scaffold_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scaffold_wrapper.py Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,318 @@
+#!/usr/bin/env python
+import optparse
+import os
+import shutil
+import sys
+import tempfile
+import subprocess
+import logging
+from string import Template
+from xml.sax.saxutils import escape
+
+log = logging.getLogger(__name__)
+
+DEBUG = True
+
+working_directory = os.getcwd()
+tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name
+tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name
+
+
+def stop_err(msg):
+ sys.stderr.write("%s\n" % msg)
+ sys.exit()
+
+
+def read_stderr():
+ stderr = ''
+ if(os.path.exists(tmp_stderr_name)):
+ with open(tmp_stderr_name, 'rb') as tmp_stderr:
+ buffsize = 1048576
+ try:
+ while True:
+ stderr += tmp_stderr.read(buffsize)
+ if not stderr or len(stderr) % buffsize != 0:
+ break
+ except OverflowError:
+ pass
+ return stderr
+
+
+def execute(command, stdin=None):
+ try:
+ with open(tmp_stderr_name, 'wb') as tmp_stderr:
+ with open(tmp_stdout_name, 'wb') as tmp_stdout:
+ proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ)
+ returncode = proc.wait()
+ if returncode != 0:
+ raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr()))
+ finally:
+ print open(tmp_stderr_name, "r").read(64000)
+ print open(tmp_stdout_name, "r").read(64000)
+
+
+def delete_file(path):
+ if os.path.exists(path):
+ try:
+ os.remove(path)
+ except:
+ pass
+
+
+def delete_directory(directory):
+ if os.path.exists(directory):
+ try:
+ shutil.rmtree(directory)
+ except:
+ pass
+
+
+def symlink(source, link_name):
+ import platform
+ if platform.system() == 'Windows':
+ try:
+ import win32file
+ win32file.CreateSymbolicLink(source, link_name, 1)
+ except:
+ shutil.copy(source, link_name)
+ else:
+ os.symlink(source, link_name)
+
+
+def copy_to_working_directory(data_file, relative_path):
+ if os.path.abspath(data_file) != os.path.abspath(relative_path):
+ shutil.copy(data_file, relative_path)
+ return relative_path
+
+
+def __main__():
+ run_script()
+
+
+# Extra database attributes: name, databaseAccessionRegEx, databaseDescriptionRegEx, decoyProteinRegEx
+# Extra export types: protxml, spectrum-report, statistics, peptide-report, protein-report, experiment-report
+RUN_TEMPLATE = """
+
+
+$samples
+$display_thresholds
+
+
+
+"""
+
+EXPORT_TEMPLATE = """
+
+$display_thresholds
+
+
+
+"""
+
+def parse_groups(inputs_file, group_parts=["group"], input_parts=["name", "path"]):
+ inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()]
+ inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")]
+ cur_group = None
+ i = 0
+ group_prefixes = ["%s:" % group_part for group_part in group_parts]
+ input_prefixes = ["%s:" % input_part for input_part in input_parts]
+ groups = {}
+ while i < len(inputs_lines):
+ line = inputs_lines[i]
+ if line.startswith(group_prefixes[0]):
+ # Start new group
+ cur_group = line[len(group_prefixes[0]):]
+ group_data = {}
+ for j, group_prefix in enumerate(group_prefixes):
+ group_line = inputs_lines[i + j]
+ group_data[group_parts[j]] = group_line[len(group_prefix):]
+ i += len(group_prefixes)
+ elif line.startswith(input_prefixes[0]):
+ input = []
+ for j, input_prefix in enumerate(input_prefixes):
+ part_line = inputs_lines[i + j]
+ part = part_line[len(input_prefixes[j]):]
+ input.append(part)
+ if cur_group not in groups:
+ groups[cur_group] = {"group_data": group_data, "inputs": []}
+ groups[cur_group]["inputs"].append(input)
+ i += len(input_prefixes)
+ else:
+ # Skip empty line
+ i += 1
+ return groups
+
+
+def build_samples(samples_file):
+ group_data = parse_groups(samples_file, group_parts=["sample", "mudpit", "category"], input_parts=["name", "path", "ext"])
+ samples_description = ""
+ for sample_name, sample_data in group_data.iteritems():
+ files = sample_data["inputs"]
+ mudpit = sample_data["group_data"]["mudpit"]
+ category = sample_data["group_data"]["category"]
+ samples_description += """\n""" % (sample_name, mudpit, category)
+ for (name, path, ext) in files:
+ name = os.path.basename(name)
+ if not name.lower().endswith(ext.lower()):
+ name = "%s.%s" % (name, ext)
+ symlink(path, name)
+ samples_description += "%s\n" % os.path.abspath(name)
+ samples_description += """\n"""
+ return samples_description
+
+
+def run_script():
+ action = sys.argv[1]
+ if action == "run":
+ proc = scaffold_run
+ elif action == "export":
+ proc = scaffold_export
+ proc()
+
+
+def scaffold_export():
+ parser = optparse.OptionParser()
+ parser.add_option("--sf3")
+ parser.add_option("--output")
+ parser.add_option("--export_type")
+ populate_threshold_options(parser)
+ (options, args) = parser.parse_args()
+
+ template_parameters = {}
+
+ template_parameters["sf3_path"] = options.sf3
+ template_parameters["export_options"] = """ type="%s" """ % options.export_type
+ template_parameters["display_thresholds"] = build_display_thresholds(options)
+
+ execute_scaffold(options, EXPORT_TEMPLATE, template_parameters)
+
+
+def build_display_thresholds(options):
+ attributes = ['id="thresh"']
+ if options.protein_probability is not None:
+ attributes.append('proteinProbability="%s"' % options.protein_probability)
+ if options.peptide_probability is not None:
+ attributes.append('peptideProbability="%s"' % options.peptide_probability)
+ if options.minimum_peptide_count is not None:
+ attributes.append('minimumPeptideCount="%s"' % options.minimum_peptide_count)
+ if options.minimum_peptide_length is not None:
+ attributes.append('minimumPeptideLength="%s"' % options.minimum_peptide_length)
+ if options.minimum_ntt is not None:
+ attributes.append('minimumNTT="%s"' % options.minimum_ntt)
+ attributes.append('useCharge="%s"' % build_use_charge_option(options))
+ tag_open = ""
+ tag_body = "".join([f(options) for f in [tandem_opts, omssa_opts]])
+ tag_close = ""
+ return tag_open + tag_body + tag_close
+
+
+def tandem_opts(options):
+ element = ""
+ tandem_score = options.tandem_score
+ if tandem_score:
+ element = '' % ((tandem_score,) * 4)
+ return element
+
+
+def omssa_opts(options):
+ return ""
+
+
+def build_use_charge_option(options):
+ use_charge_array = []
+ for i in ["1", "2", "3", "4"]:
+ use_charge_i = getattr(options, "use_charge_%s" % i, True)
+ use_charge_array.append("true" if use_charge_i else "false")
+ return ",".join(use_charge_array)
+
+
+def populate_threshold_options(option_parser):
+ option_parser.add_option("--protein_probability", default=None)
+ option_parser.add_option("--peptide_probability", default=None)
+ option_parser.add_option("--minimum_peptide_count", default=None)
+ option_parser.add_option("--ignore_charge_1", action="store_false", dest="use_charge_1", default=True)
+ option_parser.add_option("--ignore_charge_2", action="store_false", dest="use_charge_2", default=True)
+ option_parser.add_option("--ignore_charge_3", action="store_false", dest="use_charge_3", default=True)
+ option_parser.add_option("--ignore_charge_4", action="store_false", dest="use_charge_4", default=True)
+ option_parser.add_option("--minimum_peptide_length", default=None)
+ option_parser.add_option("--minimum_ntt", default=None)
+ option_parser.add_option("--tandem_score", default=None)
+ option_parser.add_option("--omssa_peptide_probability", default=None)
+ option_parser.add_option("--omssa_log_expect_score", default=None)
+
+
+def database_rules(database_type):
+ rules_dict = {
+ "ESTNR": (">(gi\\|[0-9]*)", ">[^ ]* (.*)"),
+ "IPI": (">IPI:([^\\| .]*)", ">[^ ]* Tax_Id=[0-9]* (.*)"),
+ "SWISSPROT": (">([^ ]*)", ">[^ ]* \\([^ ]*\\) (.*)"),
+ "UNIPROT": (">[^ ]*\\|([^ ]*)", ">[^ ]*\\|[^ ]* (.*)"),
+ "UNIREF": (">UniRef100_([^ ]*)", ">[^ ]* (.*)"),
+ "ENSEMBL": (">(ENS[^ ]*)", ">[^ ]* (.*)"),
+ "MSDB": (">([^ ]*)", ">[^ ]* (.*)"),
+ "GENERIC": (">([^ ]*)", ">[^ ]* (.*)"),
+ }
+ database_type = database_type if database_type in rules_dict else "GENERIC"
+ return rules_dict[database_type]
+
+
+def scaffold_run():
+ parser = optparse.OptionParser()
+ parser.add_option("--samples")
+ parser.add_option("--database")
+ parser.add_option("--database_name")
+ parser.add_option("--database_type")
+ parser.add_option("--database_decoy_regex")
+ parser.add_option("--output")
+ parser.add_option("--output_driver")
+ populate_threshold_options(parser)
+ (options, args) = parser.parse_args()
+
+ template_parameters = {}
+
+ # Read samples from config file and convert to XML
+ template_parameters["samples"] = build_samples(options.samples)
+ template_parameters["display_thresholds"] = build_display_thresholds(options)
+
+ # Setup database parameters
+ database_path = options.database
+ database_name = options.database_name
+ database_type = options.database_type
+ database_decoy_regex = options.database_decoy_regex
+
+ (accession_regex, description_regex) = database_rules(database_type)
+
+ template_parameters["database_path"] = database_path
+ template_parameters["database_name"] = database_name
+ template_parameters["database_accession_regex"] = escape(accession_regex)
+ template_parameters["database_description_regex"] = escape(description_regex)
+ template_parameters["database_decoy_regex"] = escape(database_decoy_regex)
+
+ execute_scaffold(options, RUN_TEMPLATE, template_parameters)
+
+ if options.output_driver:
+ shutil.copy("driver.xml", options.output_driver)
+
+
+def execute_scaffold(options, template, template_parameters):
+ # Setup output parameter
+ output_path = options.output
+ template_parameters["output_path"] = output_path
+
+ # Prepare and create driver file
+ driver_contents = Template(template).substitute(template_parameters)
+ print driver_contents
+ driver_path = os.path.abspath("driver.xml")
+ open(driver_path, "w").write(driver_contents)
+
+ # Run Scaffold
+ execute("ScaffoldBatch3 '%s'" % driver_path)
+
+if __name__ == '__main__':
+ __main__()
diff -r 000000000000 -r e9981e6af666 update.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/update.sh Thu Jun 20 11:07:47 2013 -0400
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+LICENSE_FILE=LICENSE
+# Ensure repository contains license file.
+if [ ! -e "$LICENSE_FILE" ];
+then
+ wget http://www.apache.org/licenses/LICENSE-2.0.txt -O "$LICENSE_FILE"
+fi
+
+# Run repository specific update actions.
+if [ -f update_repo.sh ];
+then
+ ./update_repo.sh
+fi
+
+wget https://raw.github.com/gist/3749747/README_GALAXYP.md -O README_GALAXYP.md
+
+# Create repository README
+if [ ! -e README_REPO.md ];
+then
+ echo "TODO: Document this tool repository." > README_REPO.md
+fi
+cat README_REPO.md README_GALAXYP.md > README.md
+
+
+# If version file exists, update all tools to this version
+VERSION_FILE=version
+if [ -e "$VERSION_FILE" ];
+then
+ VERSION=`cat $VERSION_FILE`
+
+ # Replace tool version in each tool XML file `
+ find -iname "*xml" -exec sed -i'' -e '0,/version="\(.\+\)"/s/version="\(.\+\)"/version="'$VERSION'"/1g' {} \;
+
+fi