tmhmm_and_signalp: tools/protein_analysis/tmhmm2.py comparison

comparison tools/protein_analysis/tmhmm2.py @ 19:f3ecd80850e2 draft

v0.2.9 Python style improvements

author	peterjc
date	Wed, 01 Feb 2017 09:46:42 -0500
parents	eb6ac44d4b8e
children	a19b3ded8f33

comparison

equal deleted inserted replaced

-:eb6ac44d4b8e
+:f3ecd80850e2
 In order to make it easier to use in Galaxy, this wrapper script simplifies
 this to remove the redundant tags, and instead adds a comment line at the
 top with the column names:
 #ID	len	ExpAA	First60	PredHel	Topology
 gi|2781234|pdb|1JLY|B	304	0.01	60	0.00	0	o
 gi|4959044|gb|AAD34209.1|AF069992_1	600	0.00	0	0.00	0	o
 gi|671626|emb|CAA85685.1|	473	0.19	0.00	0	o
 gi|3298468|dbj|BAA31520.1|	107	59.37	31.17	3	o23-45i52-74o89-106i
 when there is no output from tmhmm2, and raise an error.
 """
 import sys
 import os
 import tempfile
-from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count
+from seq_analysis_utils import split_fasta, run_jobs, thread_count
 FASTA_CHUNK = 500
 if len(sys.argv) != 4:
-sys_exit("Require three arguments, number of threads (int), input protein FASTA file & output tabular file")
+sys.exit("Require three arguments, number of threads (int), input protein FASTA file & output tabular file")
 num_threads = thread_count(sys.argv[1], default=4)
 fasta_file = sys.argv[2]
 tabular_file = sys.argv[3]
 tmp_dir = tempfile.mkdtemp()
 def clean_tabular(raw_handle, out_handle):
 """Clean up tabular TMHMM output, returns output line count."""
 count = 0
 for line in raw_handle:
 if not line.strip() or line.startswith("#"):
-#Ignore any blank lines or comment lines
+# Ignore any blank lines or comment lines
 continue
 parts = line.rstrip("\r\n").split("\t")
 try:
-identifier, length, expAA, first60, predhel, topology = parts
+identifier, length, exp_aa, first60, predhel, topology = parts
-except:
+except ValueError:
-assert len(parts)!=6
+assert len(parts) != 6
-sys_exit("Bad line: %r" % line)
+sys.exit("Bad line: %r" % line)
 assert length.startswith("len="), line
 length = length[4:]
-assert expAA.startswith("ExpAA="), line
+assert exp_aa.startswith("ExpAA="), line
-expAA = expAA[6:]
+exp_aa = exp_aa[6:]
 assert first60.startswith("First60="), line
 first60 = first60[8:]
 assert predhel.startswith("PredHel="), line
 predhel = predhel[8:]
 assert topology.startswith("Topology="), line
 topology = topology[9:]
-out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" \
+out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n"
-% (identifier, length, expAA, first60, predhel, topology))
+% (identifier, length, exp_aa, first60, predhel, topology))
 count += 1
 return count
-#Note that if the input FASTA file contains no sequences,
+# Note that if the input FASTA file contains no sequences,
-#split_fasta returns an empty list (i.e. zero temp files).
+# split_fasta returns an empty list (i.e. zero temp files).
 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK)
-temp_files = [f+".out" for f in fasta_files]
+temp_files = [f + ".out" for f in fasta_files]
 jobs = ["tmhmm -short %s > %s" % (fasta, temp)
 for fasta, temp in zip(fasta_files, temp_files)]
 def clean_up(file_list):
+"""Remove temp files, and if possible the temp directory."""
 for f in file_list:
 if os.path.isfile(f):
 os.remove(f)
 try:
 os.rmdir(tmp_dir)
-except:
+except Exception:
 pass
 if len(jobs) > 1 and num_threads > 1:
-#A small "info" message for Galaxy to show the user.
+# A small "info" message for Galaxy to show the user.
 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))
 results = run_jobs(jobs, num_threads)
 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs):
 error_level = results[cmd]
 if error_level:
 try:
 output = open(temp).readline()
 except IOError:
 output = ""
 clean_up(fasta_files + temp_files)
-sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
+sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
 error_level)
 del results
 del jobs
 out_handle = open(tabular_file, "w")
 data_handle = open(temp)
 count = clean_tabular(data_handle, out_handle)
 data_handle.close()
 if not count:
 clean_up(fasta_files + temp_files)
-sys_exit("No output from tmhmm2")
+sys.exit("No output from tmhmm2")
 out_handle.close()
 clean_up(fasta_files + temp_files)

Mercurial > repos > peterjc > tmhmm_and_signalp

comparison tools/protein_analysis/tmhmm2.py @ 19:f3ecd80850e2 draft