diff tools/protein_analysis/psortb.py @ 19:f3ecd80850e2 draft

v0.2.9 Python style improvements
author peterjc
date Wed, 01 Feb 2017 09:46:42 -0500
parents eb6ac44d4b8e
children a19b3ded8f33
line wrap: on
line diff
--- a/tools/protein_analysis/psortb.py	Tue Sep 01 09:56:36 2015 -0400
+++ b/tools/protein_analysis/psortb.py	Wed Feb 01 09:46:42 2017 -0500
@@ -24,7 +24,7 @@
 import sys
 import os
 import tempfile
-from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import split_fasta, run_jobs, thread_count
 
 FASTA_CHUNK = 500
 
@@ -33,7 +33,7 @@
     sys.exit(os.system("psort --version"))
 
 if len(sys.argv) != 8:
-    sys_exit("Require 7 arguments, number of threads (int), type (e.g. archaea), "
+    sys.exit("Require 7 arguments, number of threads (int), type (e.g. archaea), "
              "output (e.g. terse/normal/long), cutoff, divergent, input protein "
              "FASTA file & output tabular file")
 
@@ -56,10 +56,10 @@
 if out_type == "terse":
     header = ['SeqID', 'Localization', 'Score']
 elif out_type == "normal":
-    sys_exit("Normal output not implemented yet, sorry.")
+    sys.exit("Normal output not implemented yet, sorry.")
 elif out_type == "long":
     if org_type == "-n":
-        #Gram negative bacteria
+        # Gram negative bacteria
         header = ['SeqID', 'CMSVM-_Localization', 'CMSVM-_Details', 'CytoSVM-_Localization', 'CytoSVM-_Details',
                   'ECSVM-_Localization', 'ECSVM-_Details', 'ModHMM-_Localization', 'ModHMM-_Details',
                   'Motif-_Localization', 'Motif-_Details', 'OMPMotif-_Localization', 'OMPMotif-_Details',
@@ -71,7 +71,7 @@
                   'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score',
                   'Secondary_Localization', 'PSortb_Version']
     elif org_type == "-p":
-        #Gram positive bacteria
+        # Gram positive bacteria
         header = ['SeqID', 'CMSVM+_Localization', 'CMSVM+_Details', 'CWSVM+_Localization', 'CWSVM+_Details',
                   'CytoSVM+_Localization', 'CytoSVM+_Details', 'ECSVM+_Localization', 'ECSVM+_Details',
                   'ModHMM+_Localization', 'ModHMM+_Details', 'Motif+_Localization', 'Motif+_Details',
@@ -82,7 +82,7 @@
                   'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score',
                   'Secondary_Localization', 'PSortb_Version']
     elif org_type == "-a":
-        #Archaea
+        # Archaea
         header = ['SeqID', 'CMSVM_a_Localization', 'CMSVM_a_Details', 'CWSVM_a_Localization', 'CWSVM_a_Details',
                   'CytoSVM_a_Localization', 'CytoSVM_a_Details', 'ECSVM_a_Localization', 'ECSVM_a_Details',
                   'ModHMM_a_Localization', 'ModHMM_a_Details', 'Motif_a_Localization', 'Motif_a_Details',
@@ -93,27 +93,28 @@
                   'Extracellular_Score', 'Final_Localization', 'Final_Localization_Details', 'Final_Score',
                   'Secondary_Localization', 'PSortb_Version']
     else:
-        sys_exit("Expected -n, -p or -a for the organism type, not %r" % org_type)
+        sys.exit("Expected -n, -p or -a for the organism type, not %r" % org_type)
 else:
-    sys_exit("Expected terse, normal or long for the output type, not %r" % out_type)
+    sys.exit("Expected terse, normal or long for the output type, not %r" % out_type)
 
 tmp_dir = tempfile.mkdtemp()
 
+
 def clean_tabular(raw_handle, out_handle):
     """Clean up tabular TMHMM output, returns output line count."""
     global header
     count = 0
     for line in raw_handle:
         if not line.strip() or line.startswith("#"):
-            #Ignore any blank lines or comment lines
+            # Ignore any blank lines or comment lines
             continue
         parts = [x.strip() for x in line.rstrip("\r\n").split("\t")]
         if parts == header:
-            #Ignore the header line
+            # Ignore the header line
             continue
         if not parts[-1] and len(parts) == len(header) + 1:
-            #Ignore dummy blank extra column, e.g.
-            #"...2.0\t\tPSORTb version 3.0\t\n"
+            # Ignore dummy blank extra column, e.g.
+            # "...2.0\t\tPSORTb version 3.0\t\n"
             parts = parts[:-1]
         assert len(parts) == len(header), \
             "%i fields, not %i, in line:\n%r" % (len(line), len(header), line)
@@ -121,24 +122,25 @@
         count += 1
     return count
 
-#Note that if the input FASTA file contains no sequences,
-#split_fasta returns an empty list (i.e. zero temp files).
+# Note that if the input FASTA file contains no sequences,
+# split_fasta returns an empty list (i.e. zero temp files).
 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK)
-temp_files = [f+".out" for f in fasta_files]
+temp_files = [f + ".out" for f in fasta_files]
 jobs = ["psort %s %s %s -o %s %s > %s" % (org_type, cutoff, divergent, out_type, fasta, temp)
         for fasta, temp in zip(fasta_files, temp_files)]
 
+
 def clean_up(file_list):
     for f in file_list:
         if os.path.isfile(f):
             os.remove(f)
     try:
         os.rmdir(tmp_dir)
-    except:
+    except Exception:
         pass
 
 if len(jobs) > 1 and num_threads > 1:
-    #A small "info" message for Galaxy to show the user.
+    # A small "info" message for Galaxy to show the user.
     print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))
 results = run_jobs(jobs, num_threads)
 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs):
@@ -149,7 +151,7 @@
         except IOError:
             output = ""
         clean_up(fasta_files + temp_files)
-        sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+        sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
 del results
 del jobs
@@ -163,7 +165,7 @@
     data_handle.close()
     if not count:
         clean_up(fasta_files + temp_files)
-        sys_exit("No output from psortb")
+        sys.exit("No output from psortb")
 out_handle.close()
 print "%i records" % count