Previous changeset 39:1236e5a49595 (2013-05-13) Next changeset 41:e57682cd6997 (2013-05-30) |
Commit message:
Uploaded |
modified:
SMART/Java/Python/cleanGff.py |
b |
diff -r 1236e5a49595 -r cd852f3e04ab SMART/Java/Python/cleanGff.py --- a/SMART/Java/Python/cleanGff.py Mon May 13 10:22:25 2013 -0400 +++ b/SMART/Java/Python/cleanGff.py Thu May 30 03:03:34 2013 -0400 |
[ |
b'@@ -43,153 +43,158 @@\n count = {}\n \n class ParsedLine(object):\n- def __init__(self, line, cpt):\n- self.line = line\n- self.cpt = cpt\n- self.parse()\n+\tdef __init__(self, line, cpt):\n+\t\tself.line = line\n+\t\tself.cpt = cpt\n+\t\tself.parse()\n \n- def parse(self):\n- self.line = self.line.strip()\n- self.splittedLine = self.line.split(None, 8)\n- if len(self.splittedLine) < 9:\n- raise Exception("Line \'%s\' has less than 9 fields. Exiting..." % (self.line))\n- self.type = self.splittedLine[2]\n- self.parseOptions()\n- self.getId()\n- self.getParents()\n+\tdef parse(self):\n+\t\tself.line = self.line.strip()\n+\t\tself.splittedLine = self.line.split(None, 8)\n+\t\tif len(self.splittedLine) < 9:\n+\t\t\traise Exception("Line \'%s\' has less than 9 fields. Exiting..." % (self.line))\n+\t\tself.type = self.splittedLine[2]\n+\t\tself.parseOptions()\n+\t\tself.getId()\n+\t\tself.getParents()\n \n- def parseOptions(self):\n- self.parsedOptions = {}\n- for option in self.splittedLine[8].split(";"):\n- option = option.strip()\n- if option == "": continue\n- posSpace = option.find(" ")\n- posEqual = option.find("=")\n- if posEqual != -1 and (posEqual < posSpace or posSpace == -1):\n- key, value = option.split("=", 1)\n- elif posSpace != -1:\n- key, value = option.split(None, 1)\n- else:\n- key = "ID"\n- value = option\n- self.parsedOptions[key.strip()] = value.strip(" \\"")\n+\tdef parseOptions(self):\n+\t\tself.parsedOptions = {}\n+\t\tfor option in self.splittedLine[8].split(";"):\n+\t\t\toption = option.strip()\n+\t\t\tif option == "": continue\n+\t\t\tposSpace = option.find(" ")\n+\t\t\tposEqual = option.find("=")\n+\t\t\tif posEqual != -1 and (posEqual < posSpace or posSpace == -1):\n+\t\t\t\tkey, value = option.split("=", 1)\n+\t\t\telif posSpace != -1:\n+\t\t\t\tkey, value = option.split(None, 1)\n+\t\t\telse:\n+\t\t\t\tkey = "ID"\n+\t\t\t\tvalue = option\n+\t\t\tself.parsedOptions[key.strip()] = value.strip(" \\"")\n \n- def getId(self):\n- for key in self.parsedOptions:\n- if key.lower() == "id":\n- self.id = self.parsedOptions[key]\n- return\n- if "Parent" in self.parsedOptions:\n- parent = self.parsedOptions["Parent"].split(",")[0]\n- if parent not in count:\n- count[parent] = {}\n- if self.type not in count[parent]:\n- count[parent][self.type] = 0\n- count[parent][self.type] += 1\n- self.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])\n- else:\n- self.id = "smart%d" % (self.cpt)\n- self.parsedOptions["ID"] = self.id\n+\tdef getId(self):\n+\t\tfor key in self.parsedOptions:\n+\t\t\tif key.lower() == "id":\n+\t\t\t\tself.id = self.parsedOptions[key]\n+\t\t\t\treturn\n+\t\tif "Parent" in self.parsedOptions:\n+\t\t\tparent = self.parsedOptions["Parent"].split(",")[0]\n+\t\t\tif parent not in count:\n+\t\t\t\tcount[parent] = {}\n+\t\t\tif self.type not in count[parent]:\n+\t\t\t\tcount[parent][self.type] = 0\n+\t\t\tcount[parent][self.type] += 1\n+\t\t\tself.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])\n+\t\telse:\n+\t\t\tself.id = "smart%d" % (self.cpt)\n+\t\tself.parsedOptions["ID"] = self.id\n \n- def getParents(self):\n- for key in self.parsedOptions:\n- if key.lower() in ("parent", "derives_from"):\n- self.parents = self.parsedOptions[key].split(",")\n- return\n- self.parents = None\n+\tdef getParents(self):\n+\t\tfor key in self.parsedOptions:\n+\t\t\tif key.lower() in ("parent", "derives_from"):\n+\t\t\t\tself.parents = self.parsedOptions[key].split(",")\n+\t\t\t\treturn\n+\t\tself.parents = None\n \n- def removeParent(self):\n- for key in self.parsedOptions.keys():\n- if key.lower() in ("parent", "derives_from"):\n- del self.parsedOptions[key]\n+\tdef removeParent(self):\n+\t\tfor key in self.parsedOptions.keys():'..b'inc()\n- progress.done()\n+\tdef sort(self):\n+\t\tprogress = Progress(len(self.lines.keys()), "Sorting file", self.verbosity)\n+\t\tfor line in self.lines.values():\n+\t\t\tparentFound = False\n+\t\t\tif line.parents:\n+\t\t\t\tfor parent in line.parents:\n+\t\t\t\t\tif parent in self.lines:\n+\t\t\t\t\t\tparentFound = True\n+\t\t\t\t\t\tif parent in self.children:\n+\t\t\t\t\t\t\tself.children[parent].append(line)\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tself.children[parent] = [line]\n+\t\t\tif not parentFound:\n+\t\t\t\tline.removeParent()\n+\t\t\t\tself.parents.append(line)\n+\t\t\tprogress.inc()\n+\t\tprogress.done()\n \n- def write(self):\n- progress = Progress(len(self.parents), "Writing output file", self.verbosity)\n- for line in self.parents:\n- self.writeLine(line)\n- progress.inc()\n- self.outputFile.close()\n- progress.done()\n+\tdef write(self):\n+\t\tprogress = Progress(len(self.parents), "Writing output file", self.verbosity)\n+\t\tfor line in self.parents:\n+\t\t\tself.writeLine(line)\n+\t\t\tprogress.inc()\n+\t\tself.outputFile.close()\n+\t\tprogress.done()\n \n- def writeLine(self, line):\n- self.outputFile.write(line.export())\n- if line.id in self.children:\n- for child in self.children[line.id]:\n- self.writeLine(child)\n+\tdef writeLine(self, line):\n+\t\tself.outputFile.write(line.export())\n+\t\tif line.id in self.children:\n+\t\t\tfor child in self.children[line.id]:\n+\t\t\t\tself.writeLine(child)\n \n- def run(self):\n- self.parse()\n- self.sort()\n- self.write()\n+\tdef run(self):\n+\t\tself.parse()\n+\t\tself.sort()\n+\t\tself.write()\n \n \n if __name__ == "__main__":\n- \n- # parse command line\n- description = "Clean GFF v1.0.3: Clean a GFF file (as given by NCBI) and outputs a GFF3 file. [Category: Other]"\n+\t\n+\t# parse command line\n+\tdescription = "Clean GFF v1.0.3: Clean a GFF file (as given by NCBI) and outputs a GFF3 file. [Category: Other]"\n \n- parser = OptionParser(description = description)\n- parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file name [compulsory] [format: file in GFF format]")\n- parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n- parser.add_option("-t", "--types", dest="types", action="store", default="mRNA,exon", type="string", help="list of comma-separated types that you want to keep [format: string] [default: mRNA,exon]")\n- parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n- (options, args) = parser.parse_args()\n+\tparser = OptionParser(description = description)\n+\tparser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file name [compulsory] [format: file in GFF format]")\n+\tparser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+\tparser.add_option("-t", "--types", dest="types", action="store", default="mRNA,exon", type="string", help="list of comma-separated types that you want to keep [format: string] [default: mRNA,exon]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")\n+\t(options, args) = parser.parse_args()\n \n- cleanGff = CleanGff(options.verbosity)\n- cleanGff.setInputFileName(options.inputFileName)\n- cleanGff.setOutputFileName(options.outputFileName)\n- cleanGff.setAcceptedTypes(options.types.split(","))\n- cleanGff.run()\n+\tcleanGff = CleanGff(options.verbosity)\n+\tcleanGff.setInputFileName(options.inputFileName)\n+\tcleanGff.setOutputFileName(options.outputFileName)\n+\tcleanGff.setAcceptedTypes(options.types.split(","))\n+\tcleanGff.run()\n \n' |