comparison tools/protein_analysis/rxlr_motifs.py @ 20:a19b3ded8f33 draft

v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author peterjc
date Thu, 21 Sep 2017 11:35:20 -0400
parents f3ecd80850e2
children 238eae32483c
comparison
equal deleted inserted replaced
19:f3ecd80850e2 20:a19b3ded8f33
29 the predicted cleavage site, as this is expected to be more accurate. 29 the predicted cleavage site, as this is expected to be more accurate.
30 Also note that the HMM score values have changed from v2.0 to v3.0. 30 Also note that the HMM score values have changed from v2.0 to v3.0.
31 Whisson et al. (2007) used SignalP v3.0 anyway. 31 Whisson et al. (2007) used SignalP v3.0 anyway.
32 32
33 Whisson et al. (2007) used HMMER 2.3.2, and althought their HMM model 33 Whisson et al. (2007) used HMMER 2.3.2, and althought their HMM model
34 can still be used with hmmsearch from HMMER 3 this this does give 34 can still be used with hmmsearch from HMMER 3, sadly this does give
35 slightly different results. We expect the hmmsearch from HMMER 2.3.2 35 slightly different results. We expect the hmmsearch from HMMER 2.3.2
36 (the last stable release of HMMER 2) to be present on the path under 36 (the last stable release of HMMER 2) to be present on the path under
37 the name hmmsearch2 (allowing it to co-exist with HMMER 3). 37 the name hmmsearch2 (allowing it to co-exist with HMMER 3).
38
39 If using Conda, you should therefore install the special "hmmer2"
40 package from BioConda which provides "hmmsearch2" etc::
41
42 conda install -c bioconda hmmer2
43
44 See https://bioconda.github.io/recipes/hmmer2/README.html and
45 https://anaconda.org/bioconda/hmmer2
38 """ 46 """
47
48 from __future__ import print_function
49
39 import os 50 import os
40 import sys
41 import re 51 import re
42 import subprocess 52 import subprocess
53 import sys
54
43 from seq_analysis_utils import fasta_iterator 55 from seq_analysis_utils import fasta_iterator
44 56
45 if "-v" in sys.argv: 57 if "-v" in sys.argv:
46 print("RXLR Motifs v0.0.10") 58 print("RXLR Motifs v0.0.14")
47 sys.exit(0) 59 sys.exit(0)
48 60
49 if len(sys.argv) != 5: 61 if len(sys.argv) != 5:
50 sys.exit("Requires four arguments: protein FASTA filename, threads, model, and output filename") 62 sys.exit("Requires four arguments: protein FASTA filename, threads, model, and output filename")
51 63
89 sys.exit("Did not recognise the model name %r\n" 101 sys.exit("Did not recognise the model name %r\n"
90 "Use Bhattacharjee2006, Win2007, or Whisson2007" % model) 102 "Use Bhattacharjee2006, Win2007, or Whisson2007" % model)
91 103
92 104
93 def get_hmmer_version(exe, required=None): 105 def get_hmmer_version(exe, required=None):
94 cmd = "%s -h" % exe
95 try: 106 try:
96 child = subprocess.Popen([exe, "-h"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 107 child = subprocess.Popen([exe, "-h"],
108 universal_newlines=True,
109 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
97 except OSError: 110 except OSError:
98 raise ValueError("Could not run %s" % exe) 111 raise ValueError("Could not run %s" % exe)
99 stdout, stderr = child.communicate() 112 stdout, stderr = child.communicate()
100 if required: 113 if required:
101 return required in stdout 114 return required in stdout
108 121
109 122
110 # Run hmmsearch for Whisson et al. (2007) 123 # Run hmmsearch for Whisson et al. (2007)
111 if model == "Whisson2007": 124 if model == "Whisson2007":
112 hmm_file = os.path.join(os.path.split(sys.argv[0])[0], 125 hmm_file = os.path.join(os.path.split(sys.argv[0])[0],
113 "whisson_et_al_rxlr_eer_cropped.hmm") 126 "whisson_et_al_rxlr_eer_cropped.hmm")
114 if not os.path.isfile(hmm_file): 127 if not os.path.isfile(hmm_file):
115 sys.exit("Missing HMM file for Whisson et al. (2007)") 128 sys.exit("Missing HMM file for Whisson et al. (2007)")
116 if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"): 129 if not get_hmmer_version(hmmer_search, "HMMER 2.3.2 (Oct 2003)"):
117 sys.exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search) 130 sys.exit("Missing HMMER 2.3.2 (Oct 2003) binary, %s" % hmmer_search)
118 131
273 # Cleanup 286 # Cleanup
274 os.remove(signalp_input_file) 287 os.remove(signalp_input_file)
275 os.remove(signalp_output_file) 288 os.remove(signalp_output_file)
276 289
277 # Short summary to stdout for Galaxy's info display 290 # Short summary to stdout for Galaxy's info display
278 print "%s for %i sequences:" % (model, total) 291 print("%s for %i sequences:" % (model, total))
279 print ", ".join("%s = %i" % kv for kv in sorted(tally.iteritems())) 292 print(", ".join("%s = %i" % kv for kv in sorted(tally.iteritems())))