Galaxy |

Changeset 18:eb6ac44d4b8e (2015-09-01)

Previous changeset 17:e6cc27d182a8 (2014-11-21) Next changeset 19:f3ecd80850e2 (2017-02-01)

Commit message:
Suite v0.2.8, record Promoter 2 verion + misc internal updates

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/README.rst
--- a/tools/protein_analysis/README.rst Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/README.rst Tue Sep 01 09:56:36 2015 -0400

@@ -14,7 +14,7 @@
To use these Galaxy wrappers you must first install the command line tools.
At the time of writing they are all free for academic use, or open source.

-These wrappers are copyright 2010-2013 by Peter Cock, James Hutton Institute
+These wrappers are copyright 2010-2015 by Peter Cock, James Hutton Institute
(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
Contributions/revisions copyright 2011 Konrad Paszkiewicz. All rights reserved.
See the included LICENCE file for details (MIT open source licence).
@@ -174,6 +174,10 @@
v0.2.6  - Use the new ``$GALAXY_SLOTS`` environment variable for thread count.
         - Updated the ``suite_config.xml`` file (overdue).
         - Tool definition now embeds citation information.
+v0.2.7  - Style cleanup in Python scripts.
+v0.2.8  - Reorder XML elements (internal change only).
+        - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
+        - Record version of Promoter 2 via ``<version_command>``.
======= ======================================================================

@@ -187,10 +191,61 @@
Development has now moved to a dedicated GitHub repository:
https://github.com/peterjc/pico_galaxy/tree/master/tools

-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
-the following command from the Galaxy root folder::
+
+For pushing a release to the test or main "Galaxy Tool Shed", use the following
+Planemo commands (which requires you have set your Tool Shed access details in
+``~/.planemo.yml`` and that you have access rights on the Tool Shed)::
+
+    $ planemo shed_update -t testtoolshed --check_diff ~/repositories/pico_galaxy/tools/protein_analysis/
+    ...
+
+or::
+
+    $ planemo shed_update -t toolshed --check_diff ~/repositories/pico_galaxy/tools/protein_analysis/
+    ...
+
+To just build and check the tar ball, use::

-    $ ./tools/protein_analysis/make_tmhmm_and_signalp.sh
+    $ planemo shed_upload --tar_only  ~/repositories/pico_galaxy/tools/protein_analysis/
+    ...
+    $ tar -tzf shed_upload.tar.gz
+    test-data/Adenovirus.fasta
+    test-data/Adenovirus.promoter2.tabular
+    test-data/empty.fasta
+    test-data/empty_promoter2.tabular
+    test-data/empty_psortb_terse.tabular
+    test-data/empty_rxlr.Bhattacharjee2006.tabular
+    test-data/empty_rxlr.Whisson2007.tabular
+    test-data/empty_rxlr.Win2007.tabular
+    test-data/empty_signalp3.tabular
+    test-data/empty_tmhmm2.tabular
+    test-data/empty_wolf_psort.tabular
+    test-data/four_human_proteins.fasta
+    test-data/four_human_proteins.signalp3.tabular
+    test-data/four_human_proteins.tmhmm2.tabular
+    test-data/four_human_proteins.wolf_psort.tabular
+    test-data/k12_ten_proteins.fasta
+    test-data/k12_ten_proteins_psortb_p_terse.tabular
+    test-data/rxlr_win_et_al_2007.fasta
+    test-data/rxlr_win_et_al_2007.tabular
+    test-data/rxlr_win_et_al_2007_sp3.tabular
+    tools/protein_analysis/LICENSE.txt
+    tools/protein_analysis/README.rst
+    tools/protein_analysis/promoter2.py
+    tools/protein_analysis/promoter2.xml
+    tools/protein_analysis/psortb.py
+    tools/protein_analysis/psortb.xml
+    tools/protein_analysis/rxlr_motifs.py
+    tools/protein_analysis/rxlr_motifs.xml
+    tools/protein_analysis/seq_analysis_utils.py
+    tools/protein_analysis/signalp3.py
+    tools/protein_analysis/signalp3.xml
+    tools/protein_analysis/suite_config.xml
+    tools/protein_analysis/tmhmm2.py
+    tools/protein_analysis/tmhmm2.xml
+    tools/protein_analysis/whisson_et_al_rxlr_eer_cropped.hmm
+    tools/protein_analysis/wolf_psort.py
+    tools/protein_analysis/wolf_psort.xml

This simplifies ensuring a consistent set of files is bundled each time,
including all the relevant test files.

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/promoter2.py
--- a/tools/protein_analysis/promoter2.py Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/promoter2.py Tue Sep 01 09:56:36 2015 -0400

[

@@ -30,12 +30,15 @@
import os
import commands
import tempfile
-from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count

FASTA_CHUNK = 500

+if "-v" in sys.argv or "--version" in sys.argv:
+    sys.exit(os.system("promoter -V"))
+
if len(sys.argv) != 4:
-    stop_err("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. "
+    sys_exit("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. "
              "Got %i arguments." % (len(sys.argv)-1))

num_threads = thread_count(sys.argv[3],default=4)
@@ -48,7 +51,7 @@
     platform = commands.getoutput("uname") #e.g. Linux
     shell_script = commands.getoutput("which promoter")
     if not os.path.isfile(shell_script):
-        stop_err("ERROR: Missing promoter executable shell script")
+        sys_exit("ERROR: Missing promoter executable shell script")
     path = None
     for line in open(shell_script):
         if line.startswith("setenv"): #could then be tab or space!
@@ -56,12 +59,12 @@
             if parts[0] == "setenv" and parts[1] == "PROM":
                 path = parts[2]
     if not path:
-        stop_err("ERROR: Could not find promoter path (PROM) in %r" % shell_script)
+        sys_exit("ERROR: Could not find promoter path (PROM) in %r" % shell_script)
     if not os.path.isdir(path):
-        stop_error("ERROR: %r is not a directory" % path)
+        sys_exit("ERROR: %r is not a directory" % path)
     bin = "%s/bin/promoter_%s" % (path, platform)
     if not os.path.isfile(bin):
-        stop_err("ERROR: Missing promoter binary %r" % bin)
+        sys_exit("ERROR: Missing promoter binary %r" % bin)
     return path, bin

def make_tabular(raw_handle, out_handle):
@@ -86,19 +89,19 @@
             except ValueError:
                 print "WARNING: Problem with line: %r" % line
                 continue
-                #stop_err("ERROR: Problem with line: %r" % line)
+                #sys_exit("ERROR: Problem with line: %r" % line)
             if likelihood not in ["ignored",
                                   "Marginal prediction",
                                   "Medium likely prediction",
                                   "Highly likely prediction"]:
-                stop_err("ERROR: Problem with line: %r" % line)
+                sys_exit("ERROR: Problem with line: %r" % line)
             out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood))
     return queries

working_dir, bin = get_path_and_binary()

if not os.path.isfile(fasta_file):
-   stop_err("ERROR: Missing input FASTA file %r" % fasta_file)
+   sys_exit("ERROR: Missing input FASTA file %r" % fasta_file)

#Note that if the input FASTA file contains no sequences,
#split_fasta returns an empty list (i.e. zero temp files).
@@ -133,7 +136,7 @@
         except IOError:
             output = ""
         clean_up(fasta_files + temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)

del results
@@ -148,7 +151,7 @@
     data_handle.close()
     if not count:
         clean_up(fasta_files + temp_files)
-        stop_err("No output from promoter2")
+        sys_exit("No output from promoter2")
     queries += count
out_handle.close()

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/promoter2.xml
--- a/tools/protein_analysis/promoter2.xml Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/promoter2.xml Tue Sep 01 09:56:36 2015 -0400

@@ -1,27 +1,29 @@
-<tool id="promoter2" name="Promoter 2.0" version="0.0.8">
+<tool id="promoter2" name="Promoter 2.0" version="0.0.10">
     <description>Find eukaryotic PolII promoters in DNA sequences</description>
     
     
     <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
+    <requirements>
+        <requirement type="binary">promoter</requirement>
+        <requirement type="package">promoter</requirement>
+    </requirements>
+    <stdio>
+        
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <version_command interpreter="python">promoter2.py --version</version_command>
     <command interpreter="python">
         promoter2.py "\$GALAXY_SLOTS" "$fasta_file" "$tabular_file"
         ##If the environment variable isn't set, get "", and the python wrapper
         ##defaults to four threads.
     </command>
-    <stdio>
-        
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of DNA sequences"/>
     </inputs>
     <outputs>
         <data name="tabular_file" format="tabular" label="Promoter2 on ${fasta_file.name}" />
     </outputs>
-    <requirements>
-        <requirement type="binary">promoter</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="Adenovirus.fasta" ftype="fasta"/>

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/psortb.py
--- a/tools/protein_analysis/psortb.py Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/psortb.py Tue Sep 01 09:56:36 2015 -0400

[

@@ -24,7 +24,7 @@
import sys
import os
import tempfile
-from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count

FASTA_CHUNK = 500

@@ -33,7 +33,7 @@
     sys.exit(os.system("psort --version"))

if len(sys.argv) != 8:
-    stop_err("Require 7 arguments, number of threads (int), type (e.g. archaea), "
+    sys_exit("Require 7 arguments, number of threads (int), type (e.g. archaea), "
              "output (e.g. terse/normal/long), cutoff, divergent, input protein "
              "FASTA file & output tabular file")

@@ -56,7 +56,7 @@
if out_type == "terse":
     header = ['SeqID', 'Localization', 'Score']
elif out_type == "normal":
-    stop_err("Normal output not implemented yet, sorry.")
+    sys_exit("Normal output not implemented yet, sorry.")
elif out_type == "long":
     if org_type == "-n":
         #Gram negative bacteria
@@ -93,9 +93,9 @@
                   'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score',
                   'Secondary_Localization', 'PSortb_Version']
     else:
-        stop_err("Expected -n, -p or -a for the organism type, not %r" % org_type)
+        sys_exit("Expected -n, -p or -a for the organism type, not %r" % org_type)
else:
-    stop_err("Expected terse, normal or long for the output type, not %r" % out_type)
+    sys_exit("Expected terse, normal or long for the output type, not %r" % out_type)

tmp_dir = tempfile.mkdtemp()

@@ -149,7 +149,7 @@
         except IOError:
             output = ""
         clean_up(fasta_files + temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
del results
del jobs
@@ -163,7 +163,7 @@
     data_handle.close()
     if not count:
         clean_up(fasta_files + temp_files)
-        stop_err("No output from psortb")
+        sys_exit("No output from psortb")
out_handle.close()
print "%i records" % count

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/psortb.xml
--- a/tools/protein_analysis/psortb.xml Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/psortb.xml Tue Sep 01 09:56:36 2015 -0400

@@ -1,61 +1,62 @@
-<tool id="Psortb" name="psortb" version="0.0.5">
-  <description>Determines sub-cellular localisation of bacterial/archaeal protein sequences</description>
-  
-  
-  <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
-  <version_command interpreter="python">psortb.py --version</version_command>
-  <command interpreter="python">
-    psortb.py "\$GALAXY_SLOTS" "$type" "$long" "$cutoff" "$divergent" "$sequence" "$outfile"
-    ##If the environment variable isn't set, get "", and python wrapper
-    ##defaults to four threads.
-  </command>
-  <stdio>
-    
-    <exit_code range="1:" />
-    <exit_code range=":-1" />
-  </stdio>
-  <inputs>
-    <param format="fasta" name="sequence" type="data"
-           label="Input sequences for which to predict localisation (protein FASTA format)" />
-    <param name="type" type="select"
-           label="Organism type (N.B. all sequences in the above file must be of the same type)" >
-      <option value="-p">Gram positive bacteria</option>
-      <option value="-n">Gram negative bacteria</option>
-      <option value="-a">Archaea</option>
-    </param>
-    <param name="long" type="select" label="Output type">
-      <option value="terse">Short (terse, tabular with 3 columns)</option>
-      
-      <option value="long">Long (verbose, tabular with about 30 columns, depending on organism type)</option>
-    </param>
-    <param name="cutoff" size="10" type="float" optional="true" value=""
-           label="Sets a cutoff value for reported results (e.g. 7.5)"
-           help="Leave blank or use zero for no cutoff." />
-    <param name="divergent" size="10" type="float" optional="true" value=""
-           label="Sets a cutoff value for the multiple localization flag (e.g. 4.5)"
-           help="Leave blank or use zero for no cutoff." />
-  </inputs>
-  <outputs>
-    <data format="tabular" name="outfile" />
-  </outputs>
-  <requirements>
-    <requirement type="binary">psort</requirement>
-  </requirements>
-  <tests>
-    <test>
-      <param name="sequence" value="empty.fasta" ftype="fasta"/>
-      <param name="long" value="terse"/>
-      <output name="outfile" file="empty_psortb_terse.tabular" ftype="tabular"/>
-    </test>
-    <test>
-      <param name="sequence" value="k12_ten_proteins.fasta" ftype="fasta"/>
-      <param name="long" value="terse"/>
-      <output name="outfile" file="k12_ten_proteins_psortb_p_terse.tabular" ftype="tabular"/>
-    </test>
-  </tests>
-  <help>
+<tool id="Psortb" name="psortb" version="0.0.7">
+    <description>Determines sub-cellular localisation of bacterial/archaeal protein sequences</description>
+    
+    
+    <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
+    <requirements>
+        <requirement type="binary">psort</requirement>
+        <requirement type="package">psort</requirement>
+    </requirements>
+    <stdio>
+        
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <version_command interpreter="python">psortb.py --version</version_command>
+    <command interpreter="python">
+psortb.py "\$GALAXY_SLOTS" "$type" "$long" "$cutoff" "$divergent" "$sequence" "$outfile"
+##If the environment variable isn't set, get "", and python wrapper
+##defaults to four threads.
+    </command>
+    <inputs>
+        <param format="fasta" name="sequence" type="data"
+               label="Input sequences for which to predict localisation (protein FASTA format)" />
+        <param name="type" type="select"
+               label="Organism type (N.B. all sequences in the above file must be of the same type)" >
+            <option value="-p">Gram positive bacteria</option>
+            <option value="-n">Gram negative bacteria</option>
+            <option value="-a">Archaea</option>
+        </param>
+        <param name="long" type="select" label="Output type">
+            <option value="terse">Short (terse, tabular with 3 columns)</option>
+            
+            <option value="long">Long (verbose, tabular with about 30 columns, depending on organism type)</option>
+        </param>
+        <param name="cutoff" size="10" type="float" optional="true" value=""
+               label="Sets a cutoff value for reported results (e.g. 7.5)"
+               help="Leave blank or use zero for no cutoff." />
+        <param name="divergent" size="10" type="float" optional="true" value=""
+               label="Sets a cutoff value for the multiple localization flag (e.g. 4.5)"
+               help="Leave blank or use zero for no cutoff." />
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outfile" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="sequence" value="empty.fasta" ftype="fasta"/>
+            <param name="long" value="terse"/>
+            <output name="outfile" file="empty_psortb_terse.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="sequence" value="k12_ten_proteins.fasta" ftype="fasta"/>
+            <param name="long" value="terse"/>
+            <output name="outfile" file="k12_ten_proteins_psortb_p_terse.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>

**What it does**

@@ -99,9 +100,9 @@

This wrapper is available to install into other Galaxy Instances via the Galaxy
Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/tmhmm_and_signalp
+    </help>
     <citations>
         <citation type="doi">10.7717/peerj.167</citation>
         <citation type="doi">10.1093/bioinformatics/btq249</citation>
     </citations>
-  </help>
</tool>

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/rxlr_motifs.py
--- a/tools/protein_analysis/rxlr_motifs.py Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/rxlr_motifs.py Tue Sep 01 09:56:36 2015 -0400

[

b'@@ -40,10 +40,14 @@\n import sys\n import re\n import subprocess\n-from seq_analysis_utils import stop_err, fasta_iterator\n+from seq_analysis_utils import sys_exit, fasta_iterator\n+\n+if "-v" in sys.argv:\n+ print("RXLR Motifs v0.0.10")\n+ sys.exit(0)\n \n if len(sys.argv) != 5:\n- stop_err("Requires four arguments: protein FASTA filename, threads, model, and output filename")\n+ sys_exit("Requires four arguments: protein FASTA filename, threads, model, and output filename")\n \n fasta_file, threads, model, tabular_file = sys.argv[1:]\n hmm_output_file = tabular_file + ".hmm.tmp"\n@@ -53,36 +57,36 @@\n hmmer_search = "hmmsearch2"\n \n if model == "Bhattacharjee2006":\n- signalp_trunc = 70\n- re_rxlr = re.compile("R.LR")\n- min_sp = 10\n- max_sp = 40\n- max_sp_rxlr = 100\n- min_rxlr_start = 1\n- #Allow signal peptide to be at most 40aa, and want RXLR to be\n- #within 100aa, therefore for the prescreen the max start is 140:\n- max_rxlr_start = max_sp + max_sp_rxlr\n+ signalp_trunc = 70\n+ re_rxlr = re.compile("R.LR")\n+ min_sp = 10\n+ max_sp = 40\n+ max_sp_rxlr = 100\n+ min_rxlr_start = 1\n+ # Allow signal peptide to be at most 40aa, and want RXLR to be\n+ # within 100aa, therefore for the prescreen the max start is 140:\n+ max_rxlr_start = max_sp + max_sp_rxlr\n elif model == "Win2007":\n- signalp_trunc = 70\n- re_rxlr = re.compile("R.LR")\n- min_sp = 10\n- max_sp = 40\n- min_rxlr_start = 30\n- max_rxlr_start = 60\n- #No explicit limit on separation of signal peptide clevage\n- #and RXLR, but shortest signal peptide is 10, and furthest\n- #away RXLR is 60, so effectively limit is 50.\n- max_sp_rxlr = max_rxlr_start - min_sp + 1\n+ signalp_trunc = 70\n+ re_rxlr = re.compile("R.LR")\n+ min_sp = 10\n+ max_sp = 40\n+ min_rxlr_start = 30\n+ max_rxlr_start = 60\n+ # No explicit limit on separation of signal peptide clevage\n+ # and RXLR, but shortest signal peptide is 10, and furthest\n+ # away RXLR is 60, so effectively limit is 50.\n+ max_sp_rxlr = max_rxlr_start - min_sp + 1\n elif model == "Whisson2007":\n- signalp_trunc = 0 #zero for no truncation\n- re_rxlr = re.compile("R.LR.{,40}[ED][ED][KR]")\n- min_sp = 10\n- max_sp = 40\n- max_sp_rxlr = 100\n- min_rxlr_start = 1\n- max_rxlr_start = max_sp + max_sp_rxlr\n+ signalp_trunc = 0 # zero for no truncation\n+ re_rxlr = re.compile("R.LR.{,40}[ED][ED][KR]")\n+ min_sp = 10\n+ max_sp = 40\n+ max_sp_rxlr = 100\n+ min_rxlr_start = 1\n+ max_rxlr_start = max_sp + max_sp_rxlr\n else:\n- stop_err("Did not recognise the model name %r\\n"\n+ sys_exit("Did not recognise the model name %r\\n"\n "Use Bhattacharjee2006, Win2007, or Whisson2007" % model)\n \n \n@@ -108,49 +112,49 @@\n hmm_file = os.path.join(os.path.split(sys.argv[0])[0],\n "whisson_et_al_rxlr_eer_cropped.hmm")\n if not os.path.isfile(hmm_file):\n- stop_err("Missing HMM file for Whisson et al. (2007)")\n+ sys_exit("Missing HMM file for Whisson et al. (2007)")\n if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"):\n- stop_err("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_searcher)\n+ sys_exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search)\n \n hmm_hits = set()\n valid_ids = set()\n for title, seq in fasta_iterator(fasta_file):\n name = title.split(None,1)[0]\n if name in valid_ids:\n- stop_err("Duplicated identifier %r" % name)\n+ sys_exit("Duplicated identifier %r" % name)\n else:\n valid_ids.add(name)\n if not valid_ids:\n- #Special case, don\'t need to run HMMER if there are no sequences\n+ # Special case, don\'t need to run HMMER if there are no sequences\n pass\n else:\n- #I\'ve left the code to handle HMMER 3 in situ, in case\n- #we revisit the choice to insist on HMMER 2.\n+ # I\'ve left the code to handle HMMER 3 in situ, in case\n+ # we revisit the choice to '..b' # Might as well truncate the sequence now, makes the temp file smaller\n if signalp_trunc:\n handle.write(">%s (truncated)\\n%s\\n" % (name, seq[:signalp_trunc]))\n else:\n- #Does it matter we don\'t line wrap?\n+ # Does it matter we don\'t line wrap?\n handle.write(">%s\\n%s\\n" % (name, seq))\n count += 1\n handle.close()\n-#print "Running SignalP on %i/%i potentials." % (count, total)\n+# print "Running SignalP on %i/%i potentials." % (count, total)\n \n \n-#Run SignalP (using our wrapper script to get multi-core support etc)\n+# Run SignalP (using our wrapper script to get multi-core support etc)\n signalp_script = os.path.join(os.path.split(sys.argv[0])[0], "signalp3.py")\n if not os.path.isfile(signalp_script):\n- stop_err("Error - missing signalp3.py script")\n+ sys_exit("Error - missing signalp3.py script")\n cmd = "python %s euk %i %s %s %s" % (signalp_script, signalp_trunc, threads, signalp_input_file, signalp_output_file)\n return_code = os.system(cmd)\n if return_code:\n- stop_err("Error %i from SignalP:\\n%s" % (return_code, cmd))\n-#print "SignalP done"\n+ sys_exit("Error %i from SignalP:\\n%s" % (return_code, cmd))\n+# print "SignalP done"\n+\n \n def parse_signalp(filename):\n """Parse SignalP output, yield tuples of ID, HMM_Sprob_score and NN predicted signal peptide length.\n@@ -217,7 +222,7 @@\n handle.close()\n \n \n-#Parse SignalP results and apply the strict RXLR criteria\n+# Parse SignalP results and apply the strict RXLR criteria\n total = 0\n tally = dict()\n handle = open(tabular_file, "w")\n@@ -230,26 +235,26 @@\n match = re_rxlr.search(seq[min_rxlr_start-1:].upper())\n if match and min_rxlr_start - 1 + match.start() + 1 <= max_rxlr_start:\n del match\n- #This was the criteria for calling SignalP,\n+ # This was the criteria for calling SignalP,\n #so it will be in the SignalP results.\n sp_id, sp_hmm_score, sp_nn_len = signalp_results.next()\n assert name == sp_id, "%s vs %s" % (name, sp_id)\n if sp_hmm_score >= min_signalp_hmm and min_sp <= sp_nn_len <= max_sp:\n match = re_rxlr.search(seq[sp_nn_len:].upper())\n- if match and match.start() + 1 <= max_sp_rxlr: #1-based counting\n+ if match and match.start() + 1 <= max_sp_rxlr: # 1-based counting\n rxlr_start = sp_nn_len + match.start() + 1\n if min_rxlr_start <= rxlr_start <= max_rxlr_start:\n rxlr = "Y"\n if model == "Whisson2007":\n- #Combine the signalp with regular expression heuristic and the HMM\n+ # Combine the signalp with regular expression heuristic and the HMM\n if name in hmm_hits and rxlr == "N":\n- rxlr = "hmm" #HMM only\n+ rxlr = "hmm" # HMM only\n elif rxlr == "N":\n- rxlr = "neither" #Don\'t use N (no)\n+ rxlr = "neither" # Don\'t use N (no)\n elif name not in hmm_hits and rxlr == "Y":\n- rxlr = "re" #Heuristic only\n- #Now have a four way classifier: Y, hmm, re, neither\n- #and count is the number of Y results (both HMM and heuristic)\n+ rxlr = "re" # Heuristic only\n+ # Now have a four way classifier: Y, hmm, re, neither\n+ # and count is the number of Y results (both HMM and heuristic)\n handle.write("%s\\t%s\\n" % (name, rxlr))\n try:\n tally[rxlr] += 1\n@@ -258,17 +263,17 @@\n handle.close()\n assert sum(tally.values()) == total\n \n-#Check the iterator is finished\n+# Check the iterator is finished\n try:\n signalp_results.next()\n assert False, "Unexpected data in SignalP output"\n except StopIteration:\n pass\n \n-#Cleanup\n+# Cleanup\n os.remove(signalp_input_file)\n os.remove(signalp_output_file)\n \n-#Short summary to stdout for Galaxy\'s info display\n+# Short summary to stdout for Galaxy\'s info display\n print "%s for %i sequences:" % (model, total)\n print ", ".join("%s = %i" % kv for kv in sorted(tally.iteritems()))\n'

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/rxlr_motifs.xml
--- a/tools/protein_analysis/rxlr_motifs.xml Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/rxlr_motifs.xml Tue Sep 01 09:56:36 2015 -0400

@@ -1,13 +1,22 @@
-<tool id="rxlr_motifs" name="RXLR Motifs" version="0.0.9">
+<tool id="rxlr_motifs" name="RXLR Motifs" version="0.0.11">
     <description>Find RXLR Effectors of Plant Pathogenic Oomycetes</description>
-    <command interpreter="python">
-      rxlr_motifs.py "$fasta_file" "\$GALAXY_SLOTS" $model "$tabular_file"
-    </command>
+    <requirements>
+        
+        <requirement type="binary">signalp</requirement>
+        <requirement type="package">signalp</requirement>
+        
+        <requirement type="binary">hmmsearch</requirement>
+        <requirement type="package">hmmsearch</requirement>
+    </requirements>
     <stdio>
         
         <exit_code range="1:" />
         <exit_code range=":-1" />
     </stdio>
+    <version_command interpreter="python">rxlr_motifs.py -v</version_command>
+    <command interpreter="python">
+      rxlr_motifs.py "$fasta_file" "\$GALAXY_SLOTS" $model "$tabular_file"
+    </command>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences" />
         <param name="model" type="select" label="Which RXLR model?">
@@ -19,12 +28,6 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="$model.value_label" />
     </outputs>
-    <requirements>
-        
-        <requirement type="binary">signalp</requirement>
-        
-        <requirement type="binary">hmmsearch</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="rxlr_win_et_al_2007.fasta" ftype="fasta" />

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/seq_analysis_utils.py
--- a/tools/protein_analysis/seq_analysis_utils.py Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/seq_analysis_utils.py Tue Sep 01 09:56:36 2015 -0400

@@ -14,7 +14,7 @@

__version__ = "0.0.1"

-def stop_err(msg, error_level=1):
+def sys_exit(msg, error_level=1):
     """Print error message to stdout and quit with given error level."""
     sys.stderr.write("%s\n" % msg)
     sys.exit(error_level)
@@ -57,7 +57,7 @@
     except:
         num = default
     if num < 1:
-        stop_err("Threads argument %r is not a positive integer" % command_line_arg)
+        sys_exit("Threads argument %r is not a positive integer" % command_line_arg)
     #Cap this with the pysical limit of the machine,
     try:
         num = min(num, cpu_count())

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/signalp3.py
--- a/tools/protein_analysis/signalp3.py Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/signalp3.py Tue Sep 01 09:56:36 2015 -0400

[

@@ -56,28 +56,28 @@
import sys
import os
import tempfile
-from seq_analysis_utils import stop_err, split_fasta, fasta_iterator
+from seq_analysis_utils import sys_exit, split_fasta, fasta_iterator
from seq_analysis_utils import run_jobs, thread_count

FASTA_CHUNK = 500
MAX_LEN = 6000 #Found by trial and error

if len(sys.argv) not in  [6,8]:
-    stop_err("Require five (or 7) arguments, organism, truncate, threads, "
+    sys_exit("Require five (or 7) arguments, organism, truncate, threads, "
              "input protein FASTA file & output tabular file (plus "
              "optionally cut method and GFF3 output file). "
              "Got %i arguments." % (len(sys.argv)-1))

organism = sys.argv[1]
if organism not in ["euk", "gram+", "gram-"]:
-    stop_err("Organism argument %s is not one of euk, gram+ or gram-" % organism)
+    sys_exit("Organism argument %s is not one of euk, gram+ or gram-" % organism)

try:
     truncate = int(sys.argv[2])
except:
     truncate = 0
if truncate < 0:
-    stop_err("Truncate argument %s is not a positive integer (or zero)" % sys.argv[2])
+    sys_exit("Truncate argument %s is not a positive integer (or zero)" % sys.argv[2])

num_threads = thread_count(sys.argv[3], default=4)
fasta_file = sys.argv[4]
@@ -86,7 +86,7 @@
if len(sys.argv) == 8:
     cut_method = sys.argv[6]
     if cut_method not in ["NN_Cmax", "NN_Ymax", "NN_Smax", "HMM_Cmax"]:
-        stop_err("Invalid cut method %r" % cut_method)
+        sys_exit("Invalid cut method %r" % cut_method)
     gff3_file = sys.argv[7]
else:
     cut_method = None
@@ -197,7 +197,7 @@
         output = "(no output)"
     if error_level or output.lower().startswith("error running"):
         clean_up(fasta_files + temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
del results

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/signalp3.xml
--- a/tools/protein_analysis/signalp3.xml Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/signalp3.xml Tue Sep 01 09:56:36 2015 -0400

@@ -1,18 +1,22 @@
-<tool id="signalp3" name="SignalP 3.0" version="0.0.14">
+<tool id="signalp3" name="SignalP 3.0" version="0.0.15">
     <description>Find signal peptides in protein sequences</description>
     
     
     <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
+    <requirements>
+        <requirement type="binary">signalp</requirement>
+        <requirement type="package">signalp</requirement>
+    </requirements>
+    <stdio>
+        
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <command interpreter="python">
       signalp3.py $organism $truncate "\$GALAXY_SLOTS" $fasta_file $tabular_file
       ##If the environment variable isn't set, get "", and the python wrapper
       ##defaults to four threads.
     </command>
-    <stdio>
-        
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/>
         <param name="organism" type="select" display="radio" label="Organism">
@@ -27,9 +31,6 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="SignalP $organism results" />
     </outputs>
-    <requirements>
-        <requirement type="binary">signalp</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta"/>

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/suite_config.xml
--- a/tools/protein_analysis/suite_config.xml Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/suite_config.xml Tue Sep 01 09:56:36 2015 -0400

@@ -1,21 +1,21 @@
-    <suite id="tmhmm_and_signalp" name="Protein/gene sequence analysis tools" version="0.2.6">
+    <suite id="tmhmm_and_signalp" name="Protein/gene sequence analysis tools" version="0.2.8">
         <description>TMHMM, SignalP, RXLR motifs, WoLF PSORT</description>
-        <tool id="tmhmm2" name="TMHMM 2.0" version="0.0.12">
+        <tool id="tmhmm2" name="TMHMM 2.0" version="0.0.14">
             <description>Find transmembrane domains in protein sequences</description>
         </tool>
-        <tool id="signalp3" name="SignalP 3.0" version="0.0.13">
+        <tool id="signalp3" name="SignalP 3.0" version="0.0.15">
             <description>Find signal peptides in protein sequences</description>
         </tool>
-        <tool id="promoter2" name="Promoter 2.0" version="0.0.7">
+        <tool id="promoter2" name="Promoter 2.0" version="0.0.9">
             <description>Find eukaryotic PolII promoters in DNA sequences</description>
         </tool>
-        <tool id="psortb" name="PSORTb" version="0.0.4">
+        <tool id="psortb" name="PSORTb" version="0.0.6">
             <description>Bacteria/archaea protein subcellular localization prediction</description>
         </tool>
-        <tool id="wolf_psort" name="WoLF PSORT" version="0.0.7">
+        <tool id="wolf_psort" name="WoLF PSORT" version="0.0.9">
             <description>Eukaryote protein subcellular localization prediction</description>
         </tool>
-        <tool id="rxlr_motifs" name="RXLR Motifs" version="0.0.8">
+        <tool id="rxlr_motifs" name="RXLR Motifs" version="0.0.11">
             <description>Find RXLR Effectors of Plant Pathogenic Oomycetes</description>
         </tool>
     </suite>

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/tmhmm2.py
--- a/tools/protein_analysis/tmhmm2.py Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/tmhmm2.py Tue Sep 01 09:56:36 2015 -0400

[

@@ -43,12 +43,12 @@
import sys
import os
import tempfile
-from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count

FASTA_CHUNK = 500

if len(sys.argv) != 4:
-    stop_err("Require three arguments, number of threads (int), input protein FASTA file & output tabular file")
+    sys_exit("Require three arguments, number of threads (int), input protein FASTA file & output tabular file")

num_threads = thread_count(sys.argv[1], default=4)
fasta_file = sys.argv[2]
@@ -68,7 +68,7 @@
             identifier, length, expAA, first60, predhel, topology = parts
         except:
             assert len(parts)!=6
-            stop_err("Bad line: %r" % line)
+            sys_exit("Bad line: %r" % line)
         assert length.startswith("len="), line
         length = length[4:]
         assert expAA.startswith("ExpAA="), line
@@ -112,7 +112,7 @@
         except IOError:
             output = ""
         clean_up(fasta_files + temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
del results
del jobs
@@ -125,7 +125,7 @@
     data_handle.close()
     if not count:
         clean_up(fasta_files + temp_files)
-        stop_err("No output from tmhmm2")
+        sys_exit("No output from tmhmm2")
out_handle.close()

clean_up(fasta_files + temp_files)

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/tmhmm2.xml
--- a/tools/protein_analysis/tmhmm2.xml Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/tmhmm2.xml Tue Sep 01 09:56:36 2015 -0400

@@ -1,18 +1,22 @@
-<tool id="tmhmm2" name="TMHMM 2.0" version="0.0.13">
+<tool id="tmhmm2" name="TMHMM 2.0" version="0.0.14">
     <description>Find transmembrane domains in protein sequences</description>
     
     
     <parallelism method="basic" split_inputs="fasta_file" split_mode="to_size" split_size="2000" merge_outputs="tabular_file"></parallelism>
+    <requirements>
+        <requirement type="binary">tmhmm</requirement>
+        <requirement type="package">tmhmm</requirement>
+    </requirements>
+    <stdio>
+        
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <command interpreter="python">
       tmhmm2.py "\$GALAXY_SLOTS" $fasta_file $tabular_file
       ##If the environment variable isn't set, get "", and the python wrapper
       ##defaults to four threads.
     </command>
-    <stdio>
-        
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/>
         <!--
@@ -25,9 +29,6 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="TMHMM results" />
     </outputs>
-    <requirements>
-        <requirement type="binary">tmhmm</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta"/>

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/wolf_psort.py
--- a/tools/protein_analysis/wolf_psort.py Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/wolf_psort.py Tue Sep 01 09:56:36 2015 -0400

[

@@ -35,7 +35,7 @@
"""
import sys
import os
-from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count

FASTA_CHUNK = 500
exe = "runWolfPsortSummary"
@@ -59,11 +59,11 @@
"""

if len(sys.argv) != 5:
-    stop_err("Require four arguments, organism, threads, input protein FASTA file & output tabular file")
+    sys_exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file")

organism = sys.argv[1]
if organism not in ["animal", "plant", "fungi"]:
-    stop_err("Organism argument %s is not one of animal, plant, fungi" % organism)
+    sys_exit("Organism argument %s is not one of animal, plant, fungi" % organism)

num_threads = thread_count(sys.argv[2], default=4)
fasta_file = sys.argv[3]
@@ -106,7 +106,7 @@
     if error_level or output.lower().startswith("error running"):
         clean_up(fasta_files)
         clean_up(temp_files)
-        stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
del results

diff -r e6cc27d182a8 -r eb6ac44d4b8e tools/protein_analysis/wolf_psort.xml
--- a/tools/protein_analysis/wolf_psort.xml Fri Nov 21 08:19:09 2014 -0500
+++ b/tools/protein_analysis/wolf_psort.xml Tue Sep 01 09:56:36 2015 -0400

@@ -1,15 +1,19 @@
-<tool id="wolf_psort" name="WoLF PSORT" version="0.0.8">
+<tool id="wolf_psort" name="WoLF PSORT" version="0.0.9">
     <description>Eukaryote protein subcellular localization prediction</description>
+    <requirements>
+        <requirement type="binary">runWolfPsortSummary</requirement>
+        <requirement type="binary">psort</requirement>
+    </requirements>
+    <stdio>
+        
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <command interpreter="python">
       wolf_psort.py $organism "\$GALAXY_SLOTS" "$fasta_file" "$tabular_file"
       ##If the environment variable isn't set, get "", and python wrapper
       ##defaults to four threads.
     </command>
-    <stdio>
-        
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/>
         <param name="organism" type="select" display="radio" label="Organism">
@@ -21,9 +25,6 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="WoLF PSORT $organism results" />
     </outputs>
-    <requirements>
-        <requirement type="binary">runWolfPsortSummary</requirement>
-    </requirements>
     <tests>
         <test>
             <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta"/>