comparison orthologs/hmmsearch.py @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 import os
2 import optparse
3 import subprocess
4 from multiprocessing import Pool
5
6 results_dir = "./data"
7 results = "results.data"
8 result_extension = ".out"
9 model_extension = ".hmm"
10 database = ""
11
12
13 def unescape(string):
14 mapped_chars = {
15 '>': '__gt__',
16 '<': '__lt__',
17 "'": '__sq__',
18 '"': '__dq__',
19 '[': '__ob__',
20 ']': '__cb__',
21 '{': '__oc__',
22 '}': '__cc__',
23 '@': '__at__',
24 '\n': '__cn__',
25 '\r': '__cr__',
26 '\t': '__tc__',
27 '#': '__pd__'
28 }
29
30 for key, value in mapped_chars.iteritems():
31 string = string.replace(value, key)
32
33 return string
34
35
36 def unpackData(models):
37 with open(models) as f:
38 for line in f:
39 hmm = HMM(line)
40 with open(results_dir + os.sep + hmm.name + model_extension, "a") as p:
41 # print(hmm.model)
42 p.write(hmm.model)
43
44
45 class HMM:
46 def __init__(self, string):
47 lis = string.split('\t')
48 # print lis
49 self.model = self.restoreNewLines(lis[1])
50 self.name = lis[0]
51
52 def restoreNewLines(self, string):
53 return string.replace('\\n', '\n')
54
55
56 def toData(text):
57 # lis = text.split()
58 # name = lis[index_of_name_in_hmm]
59 # text = name + "\t" + text.replace("\n", "\\n")
60 # text = text.replace("\n", "\\n")
61 return text
62
63
64 def hmmsearch(input):
65 file_name = results_dir + os.sep + input
66 # print file_name
67 # return subprocess.Popen(['hmmbuild', "--informat", "afa", file_name + ".hmm", file_name], stdout=subprocess.PIPE).communicate()[0] # ./muscle
68 pop = subprocess.Popen(['hmmsearch', "-o", file_name + result_extension, file_name, database])
69 pop.wait()
70
71
72 def main():
73 usage = """%prog [options]
74 options (listed below) default to 'None' if omitted
75 """
76 parser = optparse.OptionParser(usage=usage)
77
78 parser.add_option(
79 '-i', '--hmm',
80 dest='hmm',
81 action='store',
82 type='string',
83 metavar="FILE",
84 help='Name of input hmm models.')
85
86 parser.add_option(
87 '-d', '--database',
88 dest='database',
89 action='store',
90 type='string',
91 metavar="FILE",
92 help='Name of sequence database.')
93
94 options, args = parser.parse_args()
95
96 global database
97 models = unescape(options.hmm)
98 database = unescape(options.database)
99
100 os.mkdir(results_dir)
101
102 unpackData(models)
103
104 list_of_files = [file for file in os.listdir(results_dir) if file.lower().endswith(model_extension)]
105
106 pool = Pool()
107 pool.map(hmmsearch, list_of_files)
108
109 result = [file for file in os.listdir(results_dir) if file.lower().endswith(result_extension)]
110 with open(results_dir + os.sep + results, "a") as f:
111 for file in result:
112 with open(results_dir + os.sep + file, "r") as r:
113 f.write(toData(r.read()) + "\n")
114
115 if __name__ == '__main__':
116 main()