comparison tools/protein_analysis/tmhmm2.py @ 21:238eae32483c draft

"Check this is up to date with all 2020 changes (black etc)"
author peterjc
date Thu, 17 Jun 2021 08:21:06 +0000
parents a19b3ded8f33
children e1996f0f4e85
comparison
equal deleted inserted replaced
20:a19b3ded8f33 21:238eae32483c
37 itself (see the SignalP XML file for settings). 37 itself (see the SignalP XML file for settings).
38 38
39 Also tmhmm2 can fail without returning an error code, for example if run on a 39 Also tmhmm2 can fail without returning an error code, for example if run on a
40 64 bit machine with only the 32 bit binaries installed. This script will spot 40 64 bit machine with only the 32 bit binaries installed. This script will spot
41 when there is no output from tmhmm2, and raise an error. 41 when there is no output from tmhmm2, and raise an error.
42 """ 42 """ # noqa: E501
43 43
44 from __future__ import print_function 44 from __future__ import print_function
45 45
46 import os 46 import os
47 import sys 47 import sys
53 53
54 if "-v" in sys.argv or "--version" in sys.argv: 54 if "-v" in sys.argv or "--version" in sys.argv:
55 sys.exit("TMHMM wrapper version 0.0.16") 55 sys.exit("TMHMM wrapper version 0.0.16")
56 56
57 if len(sys.argv) != 4: 57 if len(sys.argv) != 4:
58 sys.exit("Require three arguments, number of threads (int), input protein FASTA file & output tabular file") 58 sys.exit(
59 "Require three arguments, number of threads (int), input protein "
60 "FASTA file & output tabular file"
61 )
59 62
60 num_threads = thread_count(sys.argv[1], default=4) 63 num_threads = thread_count(sys.argv[1], default=4)
61 fasta_file = sys.argv[2] 64 fasta_file = sys.argv[2]
62 tabular_file = sys.argv[3] 65 tabular_file = sys.argv[3]
63 66
85 first60 = first60[8:] 88 first60 = first60[8:]
86 assert predhel.startswith("PredHel="), line 89 assert predhel.startswith("PredHel="), line
87 predhel = predhel[8:] 90 predhel = predhel[8:]
88 assert topology.startswith("Topology="), line 91 assert topology.startswith("Topology="), line
89 topology = topology[9:] 92 topology = topology[9:]
90 out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" 93 out_handle.write(
91 % (identifier, length, exp_aa, first60, predhel, topology)) 94 "%s\t%s\t%s\t%s\t%s\t%s\n"
95 % (identifier, length, exp_aa, first60, predhel, topology)
96 )
92 count += 1 97 count += 1
93 return count 98 return count
94 99
95 100
96 # Note that if the input FASTA file contains no sequences, 101 # Note that if the input FASTA file contains no sequences,
97 # split_fasta returns an empty list (i.e. zero temp files). 102 # split_fasta returns an empty list (i.e. zero temp files).
98 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) 103 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK)
99 temp_files = [f + ".out" for f in fasta_files] 104 temp_files = [f + ".out" for f in fasta_files]
100 jobs = ["tmhmm -short %s > %s" % (fasta, temp) 105 jobs = [
101 for fasta, temp in zip(fasta_files, temp_files)] 106 "tmhmm -short %s > %s" % (fasta, temp)
107 for fasta, temp in zip(fasta_files, temp_files)
108 ]
102 109
103 110
104 def clean_up(file_list): 111 def clean_up(file_list):
105 """Remove temp files, and if possible the temp directory.""" 112 """Remove temp files, and if possible the temp directory."""
106 for f in file_list: 113 for f in file_list:
122 try: 129 try:
123 output = open(temp).readline() 130 output = open(temp).readline()
124 except IOError: 131 except IOError:
125 output = "" 132 output = ""
126 clean_up(fasta_files + temp_files) 133 clean_up(fasta_files + temp_files)
127 sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), 134 sys.exit(
128 error_level) 135 "One or more tasks failed, e.g. %i from %r gave:\n%s"
136 % (error_level, cmd, output),
137 error_level,
138 )
129 del results 139 del results
130 del jobs 140 del jobs
131 141
132 out_handle = open(tabular_file, "w") 142 out_handle = open(tabular_file, "w")
133 out_handle.write("#ID\tlen\tExpAA\tFirst60\tPredHel\tTopology\n") 143 out_handle.write("#ID\tlen\tExpAA\tFirst60\tPredHel\tTopology\n")