annotate manipulate/minimal_kcf/minimal_kcf.py @ 1:0a5e0df17054 draft default tip

Uploaded
author chrisb
date Fri, 06 May 2016 08:05:48 -0400
parents 89592faa2875
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
89592faa2875 Uploaded
chrisb
parents:
diff changeset
1 __author__ = "Chris Barnett"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
2 __version__ = "0.3"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
3 __license__ = "MIT"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
4
89592faa2875 Uploaded
chrisb
parents:
diff changeset
5
89592faa2875 Uploaded
chrisb
parents:
diff changeset
6 def read_meta_kcf(inputstream):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
7 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
8 :param inputstream: the kcf file
89592faa2875 Uploaded
chrisb
parents:
diff changeset
9 read kcf file (which may contain multiple kcf entries) and only keep ENTRY, NODE and EDGE parts.
89592faa2875 Uploaded
chrisb
parents:
diff changeset
10 :return:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
11 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
12 if inputstream is None or inputstream == [] or inputstream == "":
89592faa2875 Uploaded
chrisb
parents:
diff changeset
13 raise IOError("empty input stream")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
14 list_of_kcf_paragraphs = []
89592faa2875 Uploaded
chrisb
parents:
diff changeset
15 kcfpara = None
89592faa2875 Uploaded
chrisb
parents:
diff changeset
16 for line in inputstream:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
17 if "ENTRY" in line:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
18 kcfpara = [line]
89592faa2875 Uploaded
chrisb
parents:
diff changeset
19 elif "NODE" in line:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
20 _, totalnodes = line.split()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
21 totalnodes = int(totalnodes)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
22 kcfpara.append(line)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
23 for inodes in range(0, totalnodes):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
24 nodeline = inputstream.next()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
25 kcfpara.append(nodeline)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
26 elif "EDGE" in line:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
27 _, totaledges = line.split()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
28 kcfpara.append(line)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
29 totaledges = int(totaledges)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
30 for inodes in range(0, totaledges):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
31 edgeline = inputstream.next()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
32 kcfpara.append(edgeline)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
33 elif "///" in line:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
34 kcfpara.append(line)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
35 list_of_kcf_paragraphs.append(kcfpara)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
36 # . sometimes kcf has no /// or final kcf in many has no ////, so add it
89592faa2875 Uploaded
chrisb
parents:
diff changeset
37 if kcfpara not in list_of_kcf_paragraphs:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
38 list_of_kcf_paragraphs.append(kcfpara)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
39
89592faa2875 Uploaded
chrisb
parents:
diff changeset
40 return list_of_kcf_paragraphs # why this list. easier to deal with each glycan as an individual item in the list
89592faa2875 Uploaded
chrisb
parents:
diff changeset
41
89592faa2875 Uploaded
chrisb
parents:
diff changeset
42
89592faa2875 Uploaded
chrisb
parents:
diff changeset
43 def flatten_meta_kcf_list(metakcflist):
89592faa2875 Uploaded
chrisb
parents:
diff changeset
44 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
45
89592faa2875 Uploaded
chrisb
parents:
diff changeset
46 :param metakcflist: a list containing lists of strings
89592faa2875 Uploaded
chrisb
parents:
diff changeset
47 :return: combined kcfs as a large string for saving to file
89592faa2875 Uploaded
chrisb
parents:
diff changeset
48 """
89592faa2875 Uploaded
chrisb
parents:
diff changeset
49 import itertools
89592faa2875 Uploaded
chrisb
parents:
diff changeset
50
89592faa2875 Uploaded
chrisb
parents:
diff changeset
51 return "".join(list(itertools.chain(*metakcflist)))
89592faa2875 Uploaded
chrisb
parents:
diff changeset
52
89592faa2875 Uploaded
chrisb
parents:
diff changeset
53
89592faa2875 Uploaded
chrisb
parents:
diff changeset
54 if __name__ == "__main__":
89592faa2875 Uploaded
chrisb
parents:
diff changeset
55 from optparse import OptionParser
89592faa2875 Uploaded
chrisb
parents:
diff changeset
56
89592faa2875 Uploaded
chrisb
parents:
diff changeset
57 usage = "usage: python %prog [options]\n"
89592faa2875 Uploaded
chrisb
parents:
diff changeset
58 parser = OptionParser(usage=usage)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
59 parser.add_option("-i", action="store", type="string", dest="i", default="input",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
60 help="input kcf file (input)")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
61 parser.add_option("-o", action="store", type="string", dest="o", default="output",
89592faa2875 Uploaded
chrisb
parents:
diff changeset
62 help="output kcf file (output)")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
63 (options, args) = parser.parse_args()
89592faa2875 Uploaded
chrisb
parents:
diff changeset
64
89592faa2875 Uploaded
chrisb
parents:
diff changeset
65 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
66 inputname = options.i
89592faa2875 Uploaded
chrisb
parents:
diff changeset
67 outputname = options.o
89592faa2875 Uploaded
chrisb
parents:
diff changeset
68 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
69 raise Exception(e, "Please pass an input (kcf) and output filename as arguments")
89592faa2875 Uploaded
chrisb
parents:
diff changeset
70 instream = file(inputname, 'r')
89592faa2875 Uploaded
chrisb
parents:
diff changeset
71 try:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
72 convertedkcf = read_meta_kcf(instream)
89592faa2875 Uploaded
chrisb
parents:
diff changeset
73 with open(outputname, "w") as f:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
74 f.write(flatten_meta_kcf_list(convertedkcf))
89592faa2875 Uploaded
chrisb
parents:
diff changeset
75 except Exception as e:
89592faa2875 Uploaded
chrisb
parents:
diff changeset
76 raise e