annotate lib/utils.py @ 0:1d1b9e1b2e2f draft

Uploaded
author petr-novak
date Thu, 19 Dec 2019 10:24:45 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python3
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
2 import os
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
3 import hashlib
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
4
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
5 from itertools import chain
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
6
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
7
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
8
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
9 def md5checksum(filename, fail_if_missing=True):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
10 try:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
11 md5 = hashlib.md5()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
12 with open(filename, "rb") as f:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
13 for i in iter(lambda: f.read(4096), b""):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
14 md5.update(i)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
15 except FileNotFoundError as e:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
16 if not fail_if_missing:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
17 return "Not calculated!!!! File {} is missing".format(filename)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
18 else:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
19 raise e
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
20
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
21 return md5.hexdigest()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
22
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
23
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
24 class FilePath(str):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
25 '''
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
26 Extension of str - it just contain additional atribute showing that the string is alsp path to file
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
27 '''
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
28
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
29 def __new__(cls, string):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
30 obj = super(FilePath, cls).__new__(cls, string)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
31 obj.filepath = True
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
32 return obj
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
33
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
34 def relative(self, start):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
35 ''' return path relative to start'''
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
36 return os.path.relpath(self, start)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
37
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
38
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
39 def save_as_table(d, path, header=None, relative=True):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
40 ''' takes list of dictionaries and save csv file
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
41 define header if you want to use specific order!
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
42 '''
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
43 pathdir = os.path.dirname(path)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
44 if not header:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
45
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
46 all_keys = [i.keys() for i in d]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
47 header = set(chain(*all_keys))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
48 print("header: ---------", header)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
49 with open(path, 'w') as f:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
50 f.write("\t".join(header))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
51 f.write("\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
52 for i in d:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
53 istr = []
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
54 for key in header:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
55 if isinstance(i[key], FilePath):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
56 if relative:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
57 istr.append('"' + str(i[key].relative(pathdir)) + '"')
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
58 else:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
59 istr.append('"' + str(i[key]) + '"')
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
60 else:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
61 if isinstance(i[key], str):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
62 istr.append('"' + str(i[key] + '"'))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
63 else:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
64 istr.append(str(i[key]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
65
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
66 f.write("\t".join(istr))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
67 f.write("\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
68
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
69
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
70 def export_tandem_consensus(clusters_info, path, rank=1, n=1):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
71 ''' export tr consensu to file'''
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
72 print("exporting fasta files")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
73 print(clusters_info)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
74 s = None
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
75 with open(path, 'w') as f:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
76 for cl in clusters_info:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
77 print(cl)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
78 print(dir(cl))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
79 if cl.TR_consensus and rank == cl.tandem_rank:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
80 s = ">CL{index}_TR_{n}_x_{L}nt\n{sequence}\n".format(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
81 index=cl.index,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
82 n=n,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
83 L=cl.TR_monomer_length,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
84 sequence=n * cl.TR_consensus.replace('<pre>', ''))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
85 f.write(s)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
86 if s:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
87 return path
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
88 else:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
89 return None
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
90
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
91
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
92 def file_len(filename):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
93 '''count number of lines in file'''
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
94 with open(filename) as f:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
95 i = 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
96 for i in f:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
97 i += i
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
98 return i
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
99
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
100 def go2line(f, L):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
101 ''' find line L in file object f '''
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
102 f.seek(0)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
103 if L == 0:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
104 return
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
105 i = 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
106 pos = f.tell()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
107 for line in f:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
108 i += 1
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
109 if i == L:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
110 f.seek(pos)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
111 return
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
112 else:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
113 pos = pos + len(line)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
114
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
115 def format_query(x):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
116 '''
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
117 make list for query in format ("x","y","x",...)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
118 '''
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
119 out = '("'+ '","'.join(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
120 map(str, x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
121 ) + '")'
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
122 return out