changeset 1:790d80981060

Update.
author galaxyp
date Fri, 10 May 2013 18:04:15 -0400
parents 7dcb26ce559c
children ebeaa403d9eb
files proteinpilot.xml proteinpilot_wrapper.py
diffstat 2 files changed, 71 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/proteinpilot.xml	Wed Dec 19 00:22:55 2012 -0500
+++ b/proteinpilot.xml	Fri May 10 18:04:15 2013 -0400
@@ -1,13 +1,19 @@
 <tool id="proteinpilot" version="0.2.1" name="ProteinPilot">
   <description></description>
   <!-- ESCAPE input and database name -->
+  <configfiles>
+    <configfile name="input_config">## Describe input groups
+group:DEFAULT_GROUP
+#for $input in $inputs:
+name:${input.display_name}
+path:${input}
+#end for
+</configfile>
+  </configfiles>  
   <command interpreter="python">proteinpilot_wrapper.py 
-    #for $input in $inputs:
-    --input="${input}"
-    --input_name="${input.name}"
-    #end for
+    --input_config="$input_config"
     --database="${database}"
-    --database_name="${database.name}"
+    --database_name="${database.display_name}"
     --output=$output
     --output_methods=$output_methods
     --sample_type="$sample_type"
@@ -38,6 +44,10 @@
       <option value="iTRAQ 4plex (Protein Labeled)">iTRAQ 4plex (Protein Labeled)</option>
       <option value="iTRAQ 8plex (Peptide Labeled)">iTRAQ 8plex (Peptide Labeled)</option>
       <option value="iTRAQ 8plex (Protein Labeled)">iTRAQ 8plex (Protein Labeled)</option>
+      <option value="TMT6plex ID-only (Peptide Labeled)">TMT6plex ID-only (Peptide Labeled)</option>
+      <option value="TMT6plex ID-only (Protein Labeled)">TMT6plex ID-only (Protein Labeled)</option>
+      <option value="TMT2plex ID-only (Peptide Labeled)">TMT2plex ID-only (Peptide Labeled)</option>
+      <option value="TMT ID-only (Peptide Labeled)">TMT ID-only (Peptide Labeled)</option>
       <option value="mTRAQ (Peptide Labeled - M00, M04)">mTRAQ (Peptide Labeled - M00, M04)</option>
       <option value="mTRAQ (Peptide Labeled - M00, M08)">mTRAQ (Peptide Labeled - M00, M08)</option>
       <option value="mTRAQ (Peptide Labeled - M04, M08)">mTRAQ (Peptide Labeled - M04, M08)</option>
@@ -62,7 +72,7 @@
       <UI_SAMPLE_TYPE>TMT6plex ID-only (Peptide Labeled)</UI_SAMPLE_TYPE>
       <UI_QUANT_TYPE></UI_QUANT_TYPE>
       
-      <UI_SAMPLE_TYPE>TMT6plex ID-only (Protein Labeled)</UI_SAMPLE_TYPE>
+      <UI_SAMPLE_TYPE></UI_SAMPLE_TYPE>
       <UI_QUANT_TYPE></UI_QUANT_TYPE>
 
       <UI_SAMPLE_TYPE>TMT2plex ID-only (Peptide Labeled)</UI_SAMPLE_TYPE>
@@ -107,6 +117,7 @@
       <option value="Lys C + Glu C">Lys C + Glu C</option>
       <option value="Glu C + Asp N">Glu C + Asp N</option>
       <option value="Trypsin using MSIPI database">Trypsin using MSIPI database</option>
+      <option value="None">None</option>
     </param>
     <param type="select" name="instrument" label="Instrument">
       <option value="TripleTOF 5600">TripleTOF 5600</option>
--- a/proteinpilot_wrapper.py	Wed Dec 19 00:22:55 2012 -0500
+++ b/proteinpilot_wrapper.py	Fri May 10 18:04:15 2013 -0400
@@ -3,25 +3,23 @@
 import os
 import sys
 import tempfile
-import shutil 
 import subprocess
-import re
 import time
-from os.path import basename
+import shutil
 import logging
-
-assert sys.version_info[:2] >= ( 2, 6 )
+from xml.sax.saxutils import escape
 
 log = logging.getLogger(__name__)
 
 DEBUG = True
 
 working_directory = os.getcwd()
-tmp_stderr_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stderr').name
-tmp_stdout_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stdout').name
+tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name
+tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name
 
-def stop_err( msg ):
-    sys.stderr.write( "%s\n" % msg )
+
+def stop_err(msg):
+    sys.stderr.write("%s\n" % msg)
     sys.exit()
 
 
@@ -131,6 +129,40 @@
 }
 
 
+def parse_groups(inputs_file, group_parts=["group"], input_parts=["name", "path"]):
+    inputs_lines = [line.strip() for line in open(inputs_file, "r").readlines()]
+    inputs_lines = [line for line in inputs_lines if line and not line.startswith("#")]
+    cur_group = None
+    i = 0
+    group_prefixes = ["%s:" % group_part  for group_part in group_parts]
+    input_prefixes = ["%s:" % input_part for input_part in input_parts]
+    groups = {}
+    while i < len(inputs_lines):
+        line = inputs_lines[i]
+        if line.startswith(group_prefixes[0]):
+            # Start new group
+            cur_group = line[len(group_prefixes[0]):]
+            group_data = {}
+            for j, group_prefix in enumerate(group_prefixes):
+                group_line = inputs_lines[i + j]
+                group_data[group_parts[j]] = group_line[len(group_prefix):]
+            i += len(group_prefixes)
+        elif line.startswith(input_prefixes[0]):
+            input = []
+            for j, input_prefix in enumerate(input_prefixes):
+                part_line = inputs_lines[i + j]
+                part = part_line[len(input_prefixes[j]):]
+                input.append(part)
+            if cur_group not in groups:
+                groups[cur_group] = {"group_data": group_data, "inputs": []}
+            groups[cur_group]["inputs"].append(input)
+            i += len(input_prefixes)
+        else:
+            # Skip empty line
+            i += 1
+    return groups
+
+
 def get_env_property(name, default):
     if name in os.environ:
         return os.environ[name]
@@ -238,9 +270,11 @@
     return (methods_name, methods_path, database_path)
 
 
-def setup_inputs(inputs, input_names):
+def setup_inputs(inputs):
     links = []
-    for input, input_name in zip(inputs, input_names):
+    for input_data in inputs:
+        input_name = input_data[0]
+        input = input_data[1]
         if DEBUG:
             print "Processing input %s with name %s and size %d" % (input, input_name, os.stat(input).st_size)
         if not input_name.upper().endswith(".MGF"):
@@ -248,7 +282,7 @@
         link_path = os.path.abspath(input_name)
         symlink(input, link_path)
         links.append(link_path)
-    return ",".join(["<DATA type=\"MGF\" filename=\"%s\" />" % link for link in links])
+    return ",".join(["<DATA type=\"MGF\" filename=\"%s\" />" % escape(link) for link in links])
 
 
 def get_unique_path(base, extension):
@@ -265,8 +299,7 @@
 
 def run_script():
     parser = optparse.OptionParser()
-    parser.add_option("--input", dest="input", action="append", default=[])
-    parser.add_option("--input_name", dest="input_name", action="append", default=[])
+    parser.add_option("--input_config")
     parser.add_option("--database")
     parser.add_option("--database_name")
     parser.add_option("--instrument")
@@ -297,8 +330,14 @@
     $inputs
     <RESULT filename="$output" />
 </PROTEINPILOTPARAMETERS>"""
+        input_config = options.input_config
+        group_data = parse_groups(input_config)
+        group_values = group_data.values()
+        # Not using groups right now.
+        assert len(group_values) == 1, len(group_values)
+        inputs = group_data.values()[0]["inputs"]
         input_parameters = {
-            "inputs": setup_inputs(options.input, options.input_name),
+            "inputs": setup_inputs(inputs),
             "output": group_file,
             "methods_name": methods_name
         }