Mercurial > repos > chrisb > gap_all_glycan_tools
view manipulate/rename_kcf/rename_kcf.py @ 0:89592faa2875 draft
Uploaded
author | chrisb |
---|---|
date | Wed, 23 Mar 2016 14:35:56 -0400 |
parents | |
children |
line wrap: on
line source
__author__ = "Chris Barnett" __version__ = "0.3" __license__ = "MIT" class id_generator(): def __init__(self, counterinit=0): import itertools self.generator = itertools.count(counterinit) def next(self): return self.generator.next() def read_meta_kcf(inputstream, prefix="GLY", counterinit=0): """ :param inputstream: the kcf file :param prefix: the prefix for the entry. GLY by default. keep it short :param counterinit: entries are numbered starting at counterinit. 0 by default. read kcf file (which may contain multiple kcf entries) and rename the ENTRY. often the ENTRY is too long or linearcode (my fault for suggesting this) and kcf files then are not recognised properly and/or are ignored in MCAW and other analysis tools duplicates are not checked for. entries are named as GLY(x) where x is generated from a counter which by default starts at 0 :return: """ if inputstream is None or inputstream == [] or inputstream == "": raise IOError("empty input stream") counter = id_generator(counterinit) list_of_kcf_paragraphs = [] kcfpara = None for line in inputstream: if "ENTRY" in line: # . could strip and split the line and remake it, but easier to supplant it newline = "ENTRY " + str(prefix) + str(counter.next()) + " Glycan\n" kcfpara = [newline] elif "///" in line: kcfpara.append(line) list_of_kcf_paragraphs.append(kcfpara) else: if kcfpara is not None: kcfpara.append(line) # . sometimes kcf has no /// or final kcf in many has no ////, so add it if kcfpara not in list_of_kcf_paragraphs: list_of_kcf_paragraphs.append(kcfpara) return list_of_kcf_paragraphs # why this list. easier to deal with each glycan as an individual item in the list def flatten_meta_kcf_list(metakcflist): """ :param metakcflist: a list containing lists of strings :return: combined kcfs as a large string for saving to file """ import itertools return "".join(list(itertools.chain(*metakcflist))) if __name__ == "__main__": from optparse import OptionParser usage = "usage: python %prog [options]\n" parser = OptionParser(usage=usage) parser.add_option("-i", action="store", type="string", dest="i", default="input", help="input kcf file (input)") parser.add_option("-o", action="store", type="string", dest="o", default="output", help="output kcf file (output)") parser.add_option("-p", action="store", type="string", dest="p", default="GLY", help="prefix for glycan entry name change") parser.add_option("-c", action="store", type="int", dest="c", default=0, help="starting number for counter for glycan entry") (options, args) = parser.parse_args() try: inputname = options.i outputname = options.o except Exception as e: raise Exception(e, "Please pass an input (kcf) and output filename as arguments") instream = file(inputname, 'r') try: convertedkcf = read_meta_kcf(instream,prefix=options.p, counterinit=options.c) with open(outputname, "w") as f: f.write(flatten_meta_kcf_list(convertedkcf)) except Exception as e: raise e