diff manipulate/rename_kcf/rename_kcf.py @ 0:89592faa2875 draft

Uploaded
author chrisb
date Wed, 23 Mar 2016 14:35:56 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/manipulate/rename_kcf/rename_kcf.py	Wed Mar 23 14:35:56 2016 -0400
@@ -0,0 +1,86 @@
+__author__ = "Chris Barnett"
+__version__ = "0.3"
+__license__ = "MIT"
+
+class id_generator():
+    def __init__(self, counterinit=0):
+        import itertools
+
+        self.generator = itertools.count(counterinit)
+
+    def next(self):
+        return self.generator.next()
+
+
+def read_meta_kcf(inputstream, prefix="GLY", counterinit=0):
+    """
+    :param inputstream: the kcf file
+    :param prefix: the prefix for the entry. GLY by default. keep it short
+    :param counterinit: entries are numbered starting at counterinit. 0 by default.
+    read kcf file (which may contain multiple kcf entries) and rename the ENTRY.
+    often the ENTRY is too long or linearcode (my fault for suggesting this) and kcf files then are not recognised properly
+    and/or are ignored in MCAW and other analysis tools
+    duplicates are not checked for. entries are named as GLY(x) where x is generated from a counter which by default starts at 0
+    :return:
+    """
+    if inputstream is None or inputstream == [] or inputstream == "":
+        raise IOError("empty input stream")
+    counter = id_generator(counterinit)
+    list_of_kcf_paragraphs = []
+    kcfpara = None
+    for line in inputstream:
+        if "ENTRY" in line:
+            # . could strip and split the line and remake it, but easier to supplant it
+            newline = "ENTRY      " + str(prefix) + str(counter.next()) + "    Glycan\n"
+            kcfpara = [newline]
+        elif "///" in line:
+            kcfpara.append(line)
+            list_of_kcf_paragraphs.append(kcfpara)
+        else:
+            if kcfpara is not None:
+                kcfpara.append(line)
+    # . sometimes kcf has no /// or final kcf in many has no ////, so add it
+    if kcfpara not in list_of_kcf_paragraphs:
+        list_of_kcf_paragraphs.append(kcfpara)
+
+    return list_of_kcf_paragraphs  # why this list. easier to deal with each glycan as an individual item in the list
+
+
+def flatten_meta_kcf_list(metakcflist):
+    """
+
+    :param metakcflist:  a list containing lists of strings
+    :return: combined kcfs as a large string for saving to file
+    """
+    import itertools
+
+    return "".join(list(itertools.chain(*metakcflist)))
+
+
+if __name__ == "__main__":
+    from optparse import OptionParser
+
+    usage = "usage: python %prog [options]\n"
+    parser = OptionParser(usage=usage)
+    parser.add_option("-i", action="store", type="string", dest="i", default="input",
+                      help="input kcf file (input)")
+    parser.add_option("-o", action="store", type="string", dest="o", default="output",
+                      help="output kcf file (output)")
+    parser.add_option("-p", action="store", type="string", dest="p", default="GLY",
+                      help="prefix for glycan entry name change")
+    parser.add_option("-c", action="store", type="int", dest="c", default=0,
+                      help="starting number for counter for glycan entry")
+    (options, args) = parser.parse_args()
+
+    try:
+        inputname = options.i
+        outputname = options.o
+    except Exception as e:
+        raise Exception(e, "Please pass an input (kcf) and output filename as arguments")
+    instream = file(inputname, 'r')
+    try:
+        convertedkcf = read_meta_kcf(instream,prefix=options.p, counterinit=options.c)
+        with open(outputname, "w") as f:
+            f.write(flatten_meta_kcf_list(convertedkcf))
+    except Exception as e:
+        raise e