diff tools/protein_analysis/signalp3.py @ 20:a19b3ded8f33 draft

v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author peterjc
date Thu, 21 Sep 2017 11:35:20 -0400
parents f3ecd80850e2
children 238eae32483c
line wrap: on
line diff
--- a/tools/protein_analysis/signalp3.py	Wed Feb 01 09:46:42 2017 -0500
+++ b/tools/protein_analysis/signalp3.py	Thu Sep 21 11:35:20 2017 -0400
@@ -52,16 +52,24 @@
 Finally, you can opt to have a GFF3 file produced which will describe the
 predicted signal peptide and mature peptide for each protein (using one of
 the predictors which gives a cleavage site). *WORK IN PROGRESS*
-"""
-import sys
+"""  # noqa: E501
+
+from __future__ import print_function
+
 import os
+import sys
 import tempfile
-from seq_analysis_utils import split_fasta, fasta_iterator
+
+from seq_analysis_utils import fasta_iterator, split_fasta
 from seq_analysis_utils import run_jobs, thread_count
 
 FASTA_CHUNK = 500
 MAX_LEN = 6000  # Found by trial and error
 
+if "-v" in sys.argv or "--version" in sys.argv:
+    print("SignalP Galaxy wrapper version 0.0.19")
+    sys.exit(os.system("signalp -version"))
+
 if len(sys.argv) not in [6, 8]:
     sys.exit("Require five (or 7) arguments, organism, truncate, threads, "
              "input protein FASTA file & output tabular file (plus "
@@ -96,15 +104,8 @@
 tmp_dir = tempfile.mkdtemp()
 
 
-def clean_tabular(raw_handle, out_handle, gff_handle=None, cut_method=None):
+def clean_tabular(raw_handle, out_handle, gff_handle=None):
     """Clean up SignalP output to make it tabular."""
-    if cut_method:
-        cut_col = {"NN_Cmax": 2,
-                   "NN_Ymax": 5,
-                   "NN_Smax": 8,
-                   "HMM_Cmax": 16}[cut_method]
-    else:
-        cut_col = None
     for line in raw_handle:
         if not line or line.startswith("#"):
             continue
@@ -119,6 +120,7 @@
 
 
 def make_gff(fasta_file, tabular_file, gff_file, cut_method):
+    """Make a GFF file."""
     cut_col, score_col = {"NN_Cmax": (2, 1),
                           "NN_Ymax": (5, 4),
                           "NN_Smax": (8, 7),
@@ -152,7 +154,7 @@
         assert 1 <= cut <= len(seq), "%i for %s len %i" % (cut, seqid, len(seq))
         score = parts[score_col]
         gff_handle.write("##sequence-region %s %i %i\n"
-                          % (seqid, 1, len(seq)))
+                         % (seqid, 1, len(seq)))
         # If the cut is at the very begining, there is no signal peptide!
         if cut > 1:
             # signal_peptide = SO:0000418
@@ -188,9 +190,10 @@
     except Exception:
         pass
 
+
 if len(jobs) > 1 and num_threads > 1:
     # A small "info" message for Galaxy to show the user.
-    print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))
+    print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)))
 results = run_jobs(jobs, num_threads)
 assert len(fasta_files) == len(temp_files) == len(jobs)
 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs):
@@ -201,8 +204,11 @@
         output = "(no output)"
     if error_level or output.lower().startswith("error running"):
         clean_up(fasta_files + temp_files)
-        sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
-                 error_level)
+        if output:
+            sys.stderr.write("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output))
+        else:
+            sys.stderr.write("One or more tasks failed, e.g. %i from %r with no output\n" % (error_level, cmd))
+        sys.exit(error_level)
 del results
 
 out_handle = open(tabular_file, "w")