0
|
1 __license__ = "MIT"
|
|
2 __version = "0.3"
|
|
3
|
|
4 def splitkeepsep(s, sep): # seems great but adds element to an awkward spot!
|
|
5 """items and keep the separator i.e. a,b,c becomes ['a,', 'b,', 'c']"""
|
|
6 import re
|
|
7
|
|
8 return reduce(lambda acc, elem: acc[:-1] + [acc[-1] + elem] if elem == sep else acc + [elem],
|
|
9 re.split("(%s)" % re.escape(sep), s), [])
|
|
10
|
|
11
|
|
12 def create_logger(logfile):
|
|
13 import logging as logging
|
|
14 # logging.basicConfig(filename=logfile, level=logging.DEBUG)
|
|
15 logger = logging.getLogger()
|
|
16 hdlr = logging.FileHandler(logfile)
|
|
17 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
|
|
18 hdlr.setFormatter(formatter)
|
|
19 logger.addHandler(hdlr) # technically calling this multiple times will not add additional loggers
|
|
20 logger.setLevel(logging.DEBUG)
|
|
21
|
|
22
|
|
23 def create_permutations(num_branches, num_additions):
|
|
24 """
|
|
25 # create all permutations of placing num_additions monosaccharides on the end on a glycan with num_branches branch points
|
|
26 """
|
|
27 permute = []
|
|
28 import itertools
|
|
29
|
|
30 a = range(0, num_branches)
|
|
31 if num_additions > 12 or num_additions == 1: # unsupported return None
|
|
32 return []
|
|
33 elif num_additions == 2:
|
|
34 for r in itertools.product(a, a):
|
|
35 if len(set(r)) == num_additions:
|
|
36 permute.append((r[0], r[1]))
|
|
37 elif num_additions == 3:
|
|
38 for r in itertools.product(a, a, a):
|
|
39 if len(set(r)) == num_additions:
|
|
40 permute.append((r[0], r[1], r[2]))
|
|
41 elif num_additions == 4:
|
|
42 for r in itertools.product(a, a, a, a):
|
|
43 if len(set(r)) == num_additions:
|
|
44 permute.append((r[0], r[1], r[2], r[3]))
|
|
45 elif num_additions == 5:
|
|
46 for r in itertools.product(a, a, a, a, a):
|
|
47 if len(set(r)) == num_additions:
|
|
48 permute.append((r[0], r[1], r[2], r[3], r[4]))
|
|
49 elif num_additions == 6:
|
|
50 for r in itertools.product(a, a, a, a, a, a):
|
|
51 if len(set(r)) == num_additions:
|
|
52 permute.append((r[0], r[1], r[2], r[3], r[4], r[5]))
|
|
53 elif num_additions == 7:
|
|
54 for r in itertools.product(a, a, a, a, a, a, a):
|
|
55 if len(set(r)) == num_additions:
|
|
56 permute.append((r[0], r[1], r[2], r[3], r[4], r[5], r[6]))
|
|
57 elif num_additions == 8:
|
|
58 for r in itertools.product(a, a, a, a, a, a, a, a):
|
|
59 if len(set(r)) == num_additions:
|
|
60 permute.append((r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7]))
|
|
61 elif num_additions == 9:
|
|
62 for r in itertools.product(a, a, a, a, a, a, a, a, a):
|
|
63 if len(set(r)) == num_additions:
|
|
64 permute.append((r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]))
|
|
65 elif num_additions == 10:
|
|
66 for r in itertools.product(a, a, a, a, a, a, a, a, a, a):
|
|
67 if len(set(r)) == num_additions:
|
|
68 permute.append((r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8], r[9]))
|
|
69 elif num_additions == 11:
|
|
70 for r in itertools.product(a, a, a, a, a, a, a, a, a, a, a):
|
|
71 if len(set(r)) == num_additions:
|
|
72 permute.append((r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8], r[9], r[10]))
|
|
73 elif num_additions == 12:
|
|
74 for r in itertools.product(a, a, a, a, a, a, a, a, a, a, a, a):
|
|
75 if len(set(r)) == num_additions:
|
|
76 permute.append((r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8], r[9], r[10], r[11]))
|
|
77 return permute
|
|
78
|
|
79
|
|
80 def read_linearcode(inputstream, logfile="logging.log", extend=False):
|
|
81 """
|
|
82 read linear code and either append or expand to provide all forms
|
|
83 append mode, appends xylose and comma'd glycans to the root
|
|
84 extend mode create all permutations if the comma'd glycan
|
|
85 more on append: # . read linear code and append comma separated data as a new rooted branch with a xylose tag
|
|
86 # example (A??GN??(A??GN??)Ma3(A??GN??(A??GN??)Ma6)Mb4GNb4(Fa6)GN,F,F,NN,A??GN)
|
|
87 # will become
|
|
88 # (((A??GN??)(F??F??)(NN??)X??)(A??GN??(A??GN??)Ma3(A??GN??(A??GN??)Ma6)Mb4GNb4)(Fa6)GN)
|
|
89 # as all the additional glycans are attached to a xylose at the root position
|
|
90 # but why? analysis tools do not support these unattached glycans and creating all the permutations
|
|
91 # is a large task. Additionally, extend mode produces too many glycans of unknown 'veracity'.
|
|
92 :param inputstream:
|
|
93 :param logfile:
|
|
94 :param extend: boolean flag. default is append (false). True extend and creates all permutes
|
|
95 :return:
|
|
96 """
|
|
97 import logging as logging
|
|
98
|
|
99 create_logger(logfile)
|
|
100 linearcode_list = []
|
|
101 if inputstream is None or inputstream == [] or inputstream == "":
|
|
102 raise IOError("empty input stream")
|
|
103 for line in inputstream:
|
|
104 line = line.strip()
|
|
105 # does this linear code have unassigned glycans on leaves
|
|
106 if "," in line:
|
|
107 # remove brackets over entire linear code
|
|
108
|
|
109 if line[0] == "(" and line[-1] == ")":
|
|
110 lin = line[1:-1]
|
|
111 else:
|
|
112 lin = line
|
|
113 # split by , to get additional glycans
|
|
114 glycan = lin.split(",")
|
|
115 additional_glycans = glycan[1:]
|
|
116 logging.debug('These are the %s additional glycans', additional_glycans)
|
|
117 logging.debug('There are %s additional glycans', len(additional_glycans))
|
|
118
|
|
119 # assume additional glycans of size one need a ?? appended to indicate unknown linkage
|
|
120 for i in range(0, len(additional_glycans)):
|
|
121 additional_glycans[i] += "??"
|
|
122
|
|
123 if extend: # permute
|
|
124 # How many branches
|
|
125 gly = splitkeepsep(glycan[0], "(")
|
|
126 # . Intervention to prevent core fucose glycosylation
|
|
127 if len(gly[-1]) < 7 and gly[-1][0].upper() == "F":
|
|
128 logging.debug('Merging Core Fucose with adjacent branch %s %s %s ', gly[-1], len(gly[-1]),
|
|
129 gly[-1][0].upper()) # initial part + splits
|
|
130 last = gly.pop()
|
|
131 last2 = gly.pop()
|
|
132 gly.append(last2 + last)
|
|
133 logging.debug('There are %s additional leaves', len(gly)) # initial part + splits
|
|
134 logging.debug('These are the %s additional leaves', gly)
|
|
135
|
|
136 # Permutations...
|
|
137 # . Only compute permutations for certain size as not sure how to generate for all sizes
|
|
138 if len(additional_glycans) > len(gly):
|
|
139 # ignoring for now, although this is possible
|
|
140 logging.warning('Glycans> Leaves - Ignored adding permutations')
|
|
141 linearcode_list.append(line)
|
|
142 elif len(additional_glycans) > 12:
|
|
143 logging.warning('Ignored adding permutations, too many additional glys %s', additional_glycans)
|
|
144 linearcode_list.append(line)
|
|
145 elif len(additional_glycans) == 1:
|
|
146 logging.debug('There are %s permutations', len(gly))
|
|
147 for i in range(0, len(gly)):
|
|
148 tempgly = list(gly) # make sure I make a new copy every time
|
|
149 tempgly[i] = additional_glycans[0] + tempgly[i]
|
|
150 linearcode_list.append("".join(tempgly))
|
|
151 logging.debug('This, %s, is one of the permutations', "".join(tempgly))
|
|
152 else:
|
|
153 perms = create_permutations(len(gly), len(additional_glycans))
|
|
154 logging.debug('There are %s permutations', len(perms))
|
|
155 for itm in perms:
|
|
156 tempgly = list(gly) # make sure I make a new copy every time
|
|
157 # .refactor
|
|
158 for idx in range(0, len(additional_glycans)):
|
|
159 tempgly[int(itm[idx])] = additional_glycans[idx] + tempgly[int(itm[idx])]
|
|
160 logging.debug('This, %s, is one of the permutations', "".join(tempgly))
|
|
161 linearcode_list.append("".join(tempgly))
|
|
162
|
|
163 else: # append xylose tag
|
|
164 # create the additional branch and keep things sorted
|
|
165 additional_branch = "("
|
|
166 for subtype in sorted(set(additional_glycans)):
|
|
167 base = "("
|
|
168 for i in range(0, additional_glycans.count(subtype)):
|
|
169 base += subtype
|
|
170 base += ")"
|
|
171 additional_branch += base
|
|
172 additional_branch += "X??)"
|
|
173 logging.debug('These are the %s additional glycans after munging', additional_branch)
|
|
174 # How many branches
|
|
175 gly = splitkeepsep(glycan[0], "(")
|
|
176
|
|
177 # . check if the branch needs brackets
|
|
178 if ")(" not in gly[-2]: # then need to bracket this
|
|
179 gly[-2] = gly[-2][:-1] + ")(" # bracket this
|
|
180 gly[0] = "(" + gly[0]
|
|
181 logging.debug('The glycan with brackets enclosing previous main branch%s', gly)
|
|
182 else:
|
|
183 # .. todo check all others until fails and bracket.
|
|
184 pass
|
|
185
|
|
186 # prepend the additional branch
|
|
187 gly.insert(0, additional_branch)
|
|
188 logging.debug('There are %s additional leaves', len(gly)) # initial part + splits
|
|
189 logging.debug('These are the %s additional leaves', gly)
|
|
190 linearcode_list.append("".join(gly))
|
|
191 else:
|
|
192 if line != "":
|
|
193 linearcode_list.append(line)
|
|
194 return linearcode_list
|
|
195
|
|
196
|
|
197 if __name__ == "__main__":
|
|
198 from optparse import OptionParser
|
|
199
|
|
200 usage = "usage: python %prog [options]\n"
|
|
201 parser = OptionParser(usage=usage)
|
|
202 parser.add_option("-i", action="store", type="string", dest="i", default="input",
|
|
203 help="input glycan linear code ")
|
|
204 parser.add_option("-l", action="store", type="string", dest="l", default="linearcode.log",
|
|
205 help="log file name ")
|
|
206 parser.add_option("-o", action="store", type="string", dest="o", default="output",
|
|
207 help="output file name (linear code) ")
|
|
208 parser.add_option("-e", action="store_true", dest="e", default=False,
|
|
209 help="turn on extender, which creates all possible permutations . Append by default")
|
|
210 (options, args) = parser.parse_args()
|
|
211
|
|
212 try:
|
|
213 instream = file(options.i, 'r')
|
|
214 except Exception as e:
|
|
215 raise IOError(e, "the input file specified does not exist. Use -h flag for help")
|
|
216 print options.e
|
|
217 allpermutes = read_linearcode(instream, options.l, options.e)
|
|
218 try:
|
|
219 out = file(options.o, "w")
|
|
220 except Exception as e:
|
|
221 raise IOError(e, "cannot write to output file. Use -h flag for help")
|
|
222 try:
|
|
223 for item in allpermutes:
|
|
224 out.write(item + "\r\n")
|
|
225 except Exception as e:
|
|
226 raise e
|
|
227 finally:
|
|
228 out.close()
|