Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/tmhmm2.py @ 21:238eae32483c draft
"Check this is up to date with all 2020 changes (black etc)"
| author | peterjc | 
|---|---|
| date | Thu, 17 Jun 2021 08:21:06 +0000 | 
| parents | a19b3ded8f33 | 
| children | e1996f0f4e85 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 20:a19b3ded8f33 | 21:238eae32483c | 
|---|---|
| 37 itself (see the SignalP XML file for settings). | 37 itself (see the SignalP XML file for settings). | 
| 38 | 38 | 
| 39 Also tmhmm2 can fail without returning an error code, for example if run on a | 39 Also tmhmm2 can fail without returning an error code, for example if run on a | 
| 40 64 bit machine with only the 32 bit binaries installed. This script will spot | 40 64 bit machine with only the 32 bit binaries installed. This script will spot | 
| 41 when there is no output from tmhmm2, and raise an error. | 41 when there is no output from tmhmm2, and raise an error. | 
| 42 """ | 42 """ # noqa: E501 | 
| 43 | 43 | 
| 44 from __future__ import print_function | 44 from __future__ import print_function | 
| 45 | 45 | 
| 46 import os | 46 import os | 
| 47 import sys | 47 import sys | 
| 53 | 53 | 
| 54 if "-v" in sys.argv or "--version" in sys.argv: | 54 if "-v" in sys.argv or "--version" in sys.argv: | 
| 55 sys.exit("TMHMM wrapper version 0.0.16") | 55 sys.exit("TMHMM wrapper version 0.0.16") | 
| 56 | 56 | 
| 57 if len(sys.argv) != 4: | 57 if len(sys.argv) != 4: | 
| 58 sys.exit("Require three arguments, number of threads (int), input protein FASTA file & output tabular file") | 58 sys.exit( | 
| 59 "Require three arguments, number of threads (int), input protein " | |
| 60 "FASTA file & output tabular file" | |
| 61 ) | |
| 59 | 62 | 
| 60 num_threads = thread_count(sys.argv[1], default=4) | 63 num_threads = thread_count(sys.argv[1], default=4) | 
| 61 fasta_file = sys.argv[2] | 64 fasta_file = sys.argv[2] | 
| 62 tabular_file = sys.argv[3] | 65 tabular_file = sys.argv[3] | 
| 63 | 66 | 
| 85 first60 = first60[8:] | 88 first60 = first60[8:] | 
| 86 assert predhel.startswith("PredHel="), line | 89 assert predhel.startswith("PredHel="), line | 
| 87 predhel = predhel[8:] | 90 predhel = predhel[8:] | 
| 88 assert topology.startswith("Topology="), line | 91 assert topology.startswith("Topology="), line | 
| 89 topology = topology[9:] | 92 topology = topology[9:] | 
| 90 out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" | 93 out_handle.write( | 
| 91 % (identifier, length, exp_aa, first60, predhel, topology)) | 94 "%s\t%s\t%s\t%s\t%s\t%s\n" | 
| 95 % (identifier, length, exp_aa, first60, predhel, topology) | |
| 96 ) | |
| 92 count += 1 | 97 count += 1 | 
| 93 return count | 98 return count | 
| 94 | 99 | 
| 95 | 100 | 
| 96 # Note that if the input FASTA file contains no sequences, | 101 # Note that if the input FASTA file contains no sequences, | 
| 97 # split_fasta returns an empty list (i.e. zero temp files). | 102 # split_fasta returns an empty list (i.e. zero temp files). | 
| 98 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) | 103 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) | 
| 99 temp_files = [f + ".out" for f in fasta_files] | 104 temp_files = [f + ".out" for f in fasta_files] | 
| 100 jobs = ["tmhmm -short %s > %s" % (fasta, temp) | 105 jobs = [ | 
| 101 for fasta, temp in zip(fasta_files, temp_files)] | 106 "tmhmm -short %s > %s" % (fasta, temp) | 
| 107 for fasta, temp in zip(fasta_files, temp_files) | |
| 108 ] | |
| 102 | 109 | 
| 103 | 110 | 
| 104 def clean_up(file_list): | 111 def clean_up(file_list): | 
| 105 """Remove temp files, and if possible the temp directory.""" | 112 """Remove temp files, and if possible the temp directory.""" | 
| 106 for f in file_list: | 113 for f in file_list: | 
| 122 try: | 129 try: | 
| 123 output = open(temp).readline() | 130 output = open(temp).readline() | 
| 124 except IOError: | 131 except IOError: | 
| 125 output = "" | 132 output = "" | 
| 126 clean_up(fasta_files + temp_files) | 133 clean_up(fasta_files + temp_files) | 
| 127 sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), | 134 sys.exit( | 
| 128 error_level) | 135 "One or more tasks failed, e.g. %i from %r gave:\n%s" | 
| 136 % (error_level, cmd, output), | |
| 137 error_level, | |
| 138 ) | |
| 129 del results | 139 del results | 
| 130 del jobs | 140 del jobs | 
| 131 | 141 | 
| 132 out_handle = open(tabular_file, "w") | 142 out_handle = open(tabular_file, "w") | 
| 133 out_handle.write("#ID\tlen\tExpAA\tFirst60\tPredHel\tTopology\n") | 143 out_handle.write("#ID\tlen\tExpAA\tFirst60\tPredHel\tTopology\n") | 
