comparison tools/protein_analysis/tmhmm2.py @ 20:a19b3ded8f33 draft

v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author peterjc
date Thu, 21 Sep 2017 11:35:20 -0400
parents f3ecd80850e2
children 238eae32483c
comparison
equal deleted inserted replaced
19:f3ecd80850e2 20:a19b3ded8f33
38 38
39 Also tmhmm2 can fail without returning an error code, for example if run on a 39 Also tmhmm2 can fail without returning an error code, for example if run on a
40 64 bit machine with only the 32 bit binaries installed. This script will spot 40 64 bit machine with only the 32 bit binaries installed. This script will spot
41 when there is no output from tmhmm2, and raise an error. 41 when there is no output from tmhmm2, and raise an error.
42 """ 42 """
43
44 from __future__ import print_function
45
46 import os
43 import sys 47 import sys
44 import os
45 import tempfile 48 import tempfile
46 from seq_analysis_utils import split_fasta, run_jobs, thread_count 49
50 from seq_analysis_utils import run_jobs, split_fasta, thread_count
47 51
48 FASTA_CHUNK = 500 52 FASTA_CHUNK = 500
53
54 if "-v" in sys.argv or "--version" in sys.argv:
55 sys.exit("TMHMM wrapper version 0.0.16")
49 56
50 if len(sys.argv) != 4: 57 if len(sys.argv) != 4:
51 sys.exit("Require three arguments, number of threads (int), input protein FASTA file & output tabular file") 58 sys.exit("Require three arguments, number of threads (int), input protein FASTA file & output tabular file")
52 59
53 num_threads = thread_count(sys.argv[1], default=4) 60 num_threads = thread_count(sys.argv[1], default=4)
79 assert predhel.startswith("PredHel="), line 86 assert predhel.startswith("PredHel="), line
80 predhel = predhel[8:] 87 predhel = predhel[8:]
81 assert topology.startswith("Topology="), line 88 assert topology.startswith("Topology="), line
82 topology = topology[9:] 89 topology = topology[9:]
83 out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n" 90 out_handle.write("%s\t%s\t%s\t%s\t%s\t%s\n"
84 % (identifier, length, exp_aa, first60, predhel, topology)) 91 % (identifier, length, exp_aa, first60, predhel, topology))
85 count += 1 92 count += 1
86 return count 93 return count
94
87 95
88 # Note that if the input FASTA file contains no sequences, 96 # Note that if the input FASTA file contains no sequences,
89 # split_fasta returns an empty list (i.e. zero temp files). 97 # split_fasta returns an empty list (i.e. zero temp files).
90 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK) 98 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK)
91 temp_files = [f + ".out" for f in fasta_files] 99 temp_files = [f + ".out" for f in fasta_files]
101 try: 109 try:
102 os.rmdir(tmp_dir) 110 os.rmdir(tmp_dir)
103 except Exception: 111 except Exception:
104 pass 112 pass
105 113
114
106 if len(jobs) > 1 and num_threads > 1: 115 if len(jobs) > 1 and num_threads > 1:
107 # A small "info" message for Galaxy to show the user. 116 # A small "info" message for Galaxy to show the user.
108 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) 117 print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)))
109 results = run_jobs(jobs, num_threads) 118 results = run_jobs(jobs, num_threads)
110 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): 119 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs):
111 error_level = results[cmd] 120 error_level = results[cmd]
112 if error_level: 121 if error_level:
113 try: 122 try: