0
|
1 __author__ = "Chris Barnett"
|
|
2 __version__ = "0.3"
|
|
3 __license__ = "MIT"
|
|
4
|
|
5 class id_generator():
|
|
6 def __init__(self, counterinit=0):
|
|
7 import itertools
|
|
8
|
|
9 self.generator = itertools.count(counterinit)
|
|
10
|
|
11 def next(self):
|
|
12 return self.generator.next()
|
|
13
|
|
14
|
|
15 def read_meta_kcf(inputstream, prefix="GLY", counterinit=0):
|
|
16 """
|
|
17 :param inputstream: the kcf file
|
|
18 :param prefix: the prefix for the entry. GLY by default. keep it short
|
|
19 :param counterinit: entries are numbered starting at counterinit. 0 by default.
|
|
20 read kcf file (which may contain multiple kcf entries) and rename the ENTRY.
|
|
21 often the ENTRY is too long or linearcode (my fault for suggesting this) and kcf files then are not recognised properly
|
|
22 and/or are ignored in MCAW and other analysis tools
|
|
23 duplicates are not checked for. entries are named as GLY(x) where x is generated from a counter which by default starts at 0
|
|
24 :return:
|
|
25 """
|
|
26 if inputstream is None or inputstream == [] or inputstream == "":
|
|
27 raise IOError("empty input stream")
|
|
28 counter = id_generator(counterinit)
|
|
29 list_of_kcf_paragraphs = []
|
|
30 kcfpara = None
|
|
31 for line in inputstream:
|
|
32 if "ENTRY" in line:
|
|
33 # . could strip and split the line and remake it, but easier to supplant it
|
|
34 newline = "ENTRY " + str(prefix) + str(counter.next()) + " Glycan\n"
|
|
35 kcfpara = [newline]
|
|
36 elif "///" in line:
|
|
37 kcfpara.append(line)
|
|
38 list_of_kcf_paragraphs.append(kcfpara)
|
|
39 else:
|
|
40 if kcfpara is not None:
|
|
41 kcfpara.append(line)
|
|
42 # . sometimes kcf has no /// or final kcf in many has no ////, so add it
|
|
43 if kcfpara not in list_of_kcf_paragraphs:
|
|
44 list_of_kcf_paragraphs.append(kcfpara)
|
|
45
|
|
46 return list_of_kcf_paragraphs # why this list. easier to deal with each glycan as an individual item in the list
|
|
47
|
|
48
|
|
49 def flatten_meta_kcf_list(metakcflist):
|
|
50 """
|
|
51
|
|
52 :param metakcflist: a list containing lists of strings
|
|
53 :return: combined kcfs as a large string for saving to file
|
|
54 """
|
|
55 import itertools
|
|
56
|
|
57 return "".join(list(itertools.chain(*metakcflist)))
|
|
58
|
|
59
|
|
60 if __name__ == "__main__":
|
|
61 from optparse import OptionParser
|
|
62
|
|
63 usage = "usage: python %prog [options]\n"
|
|
64 parser = OptionParser(usage=usage)
|
|
65 parser.add_option("-i", action="store", type="string", dest="i", default="input",
|
|
66 help="input kcf file (input)")
|
|
67 parser.add_option("-o", action="store", type="string", dest="o", default="output",
|
|
68 help="output kcf file (output)")
|
|
69 parser.add_option("-p", action="store", type="string", dest="p", default="GLY",
|
|
70 help="prefix for glycan entry name change")
|
|
71 parser.add_option("-c", action="store", type="int", dest="c", default=0,
|
|
72 help="starting number for counter for glycan entry")
|
|
73 (options, args) = parser.parse_args()
|
|
74
|
|
75 try:
|
|
76 inputname = options.i
|
|
77 outputname = options.o
|
|
78 except Exception as e:
|
|
79 raise Exception(e, "Please pass an input (kcf) and output filename as arguments")
|
|
80 instream = file(inputname, 'r')
|
|
81 try:
|
|
82 convertedkcf = read_meta_kcf(instream,prefix=options.p, counterinit=options.c)
|
|
83 with open(outputname, "w") as f:
|
|
84 f.write(flatten_meta_kcf_list(convertedkcf))
|
|
85 except Exception as e:
|
|
86 raise e
|