diff tools/protein_analysis/seq_analysis_utils.py @ 9:e52220a9ddad draft

Uploaded v0.1.2 Use the new <stdio> settings in the XML wrappers to catch errors. Obeys SGE style XNSLOTS environment variable for thread count (otherwise default to 4).
author peterjc
date Fri, 25 Jan 2013 06:08:31 -0500
parents 9b45a8743100
children e6cc27d182a8
line wrap: on
line diff
--- a/tools/protein_analysis/seq_analysis_utils.py	Mon Jul 30 12:56:54 2012 -0400
+++ b/tools/protein_analysis/seq_analysis_utils.py	Fri Jan 25 06:08:31 2013 -0500
@@ -19,6 +19,56 @@
     sys.stderr.write("%s\n" % msg)
     sys.exit(error_level)
 
+try:
+    from multiprocessing import cpu_count
+except ImportError:
+    #Must be under Python 2.5, this is copied from multiprocessing:
+    def cpu_count():
+        """Returns the number of CPUs in the system."""
+        if sys.platform == 'win32':
+            try:
+                num = int(os.environ['NUMBER_OF_PROCESSORS'])
+            except (ValueError, KeyError):
+                num = 0
+        elif 'bsd' in sys.platform or sys.platform == 'darwin':
+            comm = '/sbin/sysctl -n hw.ncpu'
+            if sys.platform == 'darwin':
+                comm = '/usr' + comm
+                try:
+                    with os.popen(comm) as p:
+                        num = int(p.read())
+                except ValueError:
+                    num = 0
+        else:
+            try:
+                num = os.sysconf('SC_NPROCESSORS_ONLN')
+            except (ValueError, OSError, AttributeError):
+                num = 0
+
+        if num >= 1:
+            return num
+        else:
+            raise NotImplementedError('cannot determine number of cpus')
+
+
+def thread_count(command_line_arg, default=1):
+    try:
+        num = int(command_line_arg)
+    except:
+        num = default
+    if num < 1:
+        stop_err("Threads argument %r is not a positive integer" % command_line_arg)
+    #Cap this with the pysical limit of the machine,
+    try:
+        num = min(num, cpu_count())
+    except NotImplementedError:
+        pass
+    #For debugging,
+    #hostname = os.environ.get("HOSTNAME", "this machine")
+    #sys.stderr.write("Using %i cores on %s\n" % (num, hostname))
+    return num
+
+
 def fasta_iterator(filename, max_len=None, truncate=None):
     """Simple FASTA parser yielding tuples of (title, sequence) strings."""
     handle = open(filename)
@@ -109,6 +159,11 @@
     pending = jobs[:]
     running = []
     results = {}
+    if threads == 1:
+        #Special case this for speed, don't need the waits
+        for cmd in jobs:
+            results[cmd] = subprocess.call(cmd, shell=True)
+        return results
     while pending or running:
         #See if any have finished
         for (cmd, process) in running: