Repository 's_mart'
hg clone https://toolshed.g2.bx.psu.edu/repos/yufei-luo/s_mart

Changeset 40:cd852f3e04ab (2013-05-30)
Previous changeset 39:1236e5a49595 (2013-05-13) Next changeset 41:e57682cd6997 (2013-05-30)
Commit message:
Uploaded
modified:
SMART/Java/Python/cleanGff.py
b
diff -r 1236e5a49595 -r cd852f3e04ab SMART/Java/Python/cleanGff.py
--- a/SMART/Java/Python/cleanGff.py Mon May 13 10:22:25 2013 -0400
+++ b/SMART/Java/Python/cleanGff.py Thu May 30 03:03:34 2013 -0400
[
b'@@ -43,153 +43,158 @@\n count = {}\n \n class ParsedLine(object):\n-    def __init__(self, line, cpt):\n-        self.line = line\n-        self.cpt  = cpt\n-        self.parse()\n+\tdef __init__(self, line, cpt):\n+\t\tself.line = line\n+\t\tself.cpt  = cpt\n+\t\tself.parse()\n \n-    def parse(self):\n-        self.line = self.line.strip()\n-        self.splittedLine = self.line.split(None, 8)\n-        if len(self.splittedLine) < 9:\n-            raise Exception("Line \'%s\' has less than 9 fields.  Exiting..." % (self.line))\n-        self.type = self.splittedLine[2]\n-        self.parseOptions()\n-        self.getId()\n-        self.getParents()\n+\tdef parse(self):\n+\t\tself.line = self.line.strip()\n+\t\tself.splittedLine = self.line.split(None, 8)\n+\t\tif len(self.splittedLine) < 9:\n+\t\t\traise Exception("Line \'%s\' has less than 9 fields.  Exiting..." % (self.line))\n+\t\tself.type = self.splittedLine[2]\n+\t\tself.parseOptions()\n+\t\tself.getId()\n+\t\tself.getParents()\n \n-    def parseOptions(self):\n-        self.parsedOptions = {}\n-        for option in self.splittedLine[8].split(";"):\n-            option = option.strip()\n-            if option == "": continue\n-            posSpace = option.find(" ")\n-            posEqual = option.find("=")\n-            if posEqual != -1 and (posEqual < posSpace or posSpace == -1):\n-                key, value = option.split("=", 1)\n-            elif posSpace != -1:\n-                key, value = option.split(None, 1)\n-            else:\n-                key   = "ID"\n-                value = option\n-            self.parsedOptions[key.strip()] = value.strip(" \\"")\n+\tdef parseOptions(self):\n+\t\tself.parsedOptions = {}\n+\t\tfor option in self.splittedLine[8].split(";"):\n+\t\t\toption = option.strip()\n+\t\t\tif option == "": continue\n+\t\t\tposSpace = option.find(" ")\n+\t\t\tposEqual = option.find("=")\n+\t\t\tif posEqual != -1 and (posEqual < posSpace or posSpace == -1):\n+\t\t\t\tkey, value = option.split("=", 1)\n+\t\t\telif posSpace != -1:\n+\t\t\t\tkey, value = option.split(None, 1)\n+\t\t\telse:\n+\t\t\t\tkey   = "ID"\n+\t\t\t\tvalue = option\n+\t\t\tself.parsedOptions[key.strip()] = value.strip(" \\"")\n \n-    def getId(self):\n-        for key in self.parsedOptions:\n-            if key.lower() == "id":\n-                self.id = self.parsedOptions[key]\n-                return\n-        if "Parent" in self.parsedOptions:\n-            parent = self.parsedOptions["Parent"].split(",")[0]\n-            if parent not in count:\n-                count[parent] = {}\n-            if self.type not in count[parent]:\n-                count[parent][self.type] = 0\n-            count[parent][self.type] += 1\n-            self.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])\n-        else:\n-            self.id = "smart%d" % (self.cpt)\n-        self.parsedOptions["ID"] = self.id\n+\tdef getId(self):\n+\t\tfor key in self.parsedOptions:\n+\t\t\tif key.lower() == "id":\n+\t\t\t\tself.id = self.parsedOptions[key]\n+\t\t\t\treturn\n+\t\tif "Parent" in self.parsedOptions:\n+\t\t\tparent = self.parsedOptions["Parent"].split(",")[0]\n+\t\t\tif parent not in count:\n+\t\t\t\tcount[parent] = {}\n+\t\t\tif self.type not in count[parent]:\n+\t\t\t\tcount[parent][self.type] = 0\n+\t\t\tcount[parent][self.type] += 1\n+\t\t\tself.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])\n+\t\telse:\n+\t\t\tself.id = "smart%d" % (self.cpt)\n+\t\tself.parsedOptions["ID"] = self.id\n \n-    def getParents(self):\n-        for key in self.parsedOptions:\n-            if key.lower() in ("parent", "derives_from"):\n-                self.parents = self.parsedOptions[key].split(",")\n-                return\n-        self.parents = None\n+\tdef getParents(self):\n+\t\tfor key in self.parsedOptions:\n+\t\t\tif key.lower() in ("parent", "derives_from"):\n+\t\t\t\tself.parents = self.parsedOptions[key].split(",")\n+\t\t\t\treturn\n+\t\tself.parents = None\n \n-    def removeParent(self):\n-        for key in self.parsedOptions.keys():\n-            if key.lower() in ("parent", "derives_from"):\n-                del self.parsedOptions[key]\n+\tdef removeParent(self):\n+\t\tfor key in self.parsedOptions.keys():'..b'inc()\n-        progress.done()\n+\tdef sort(self):\n+\t\tprogress = Progress(len(self.lines.keys()), "Sorting file", self.verbosity)\n+\t\tfor line in self.lines.values():\n+\t\t\tparentFound = False\n+\t\t\tif line.parents:\n+\t\t\t\tfor parent in line.parents:\n+\t\t\t\t\tif parent in self.lines:\n+\t\t\t\t\t\tparentFound = True\n+\t\t\t\t\t\tif parent in self.children:\n+\t\t\t\t\t\t\tself.children[parent].append(line)\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tself.children[parent] = [line]\n+\t\t\tif not parentFound:\n+\t\t\t\tline.removeParent()\n+\t\t\t\tself.parents.append(line)\n+\t\t\tprogress.inc()\n+\t\tprogress.done()\n \n-    def write(self):\n-        progress = Progress(len(self.parents), "Writing output file", self.verbosity)\n-        for line in self.parents:\n-            self.writeLine(line)\n-            progress.inc()\n-        self.outputFile.close()\n-        progress.done()\n+\tdef write(self):\n+\t\tprogress = Progress(len(self.parents), "Writing output file", self.verbosity)\n+\t\tfor line in self.parents:\n+\t\t\tself.writeLine(line)\n+\t\t\tprogress.inc()\n+\t\tself.outputFile.close()\n+\t\tprogress.done()\n \n-    def writeLine(self, line):\n-        self.outputFile.write(line.export())\n-        if line.id in self.children:\n-            for child in self.children[line.id]:\n-                self.writeLine(child)\n+\tdef writeLine(self, line):\n+\t\tself.outputFile.write(line.export())\n+\t\tif line.id in self.children:\n+\t\t\tfor child in self.children[line.id]:\n+\t\t\t\tself.writeLine(child)\n \n-    def run(self):\n-        self.parse()\n-        self.sort()\n-        self.write()\n+\tdef run(self):\n+\t\tself.parse()\n+\t\tself.sort()\n+\t\tself.write()\n \n \n if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Clean GFF v1.0.3: Clean a GFF file (as given by NCBI) and outputs a GFF3 file. [Category: Other]"\n+\t\n+\t# parse command line\n+\tdescription = "Clean GFF v1.0.3: Clean a GFF file (as given by NCBI) and outputs a GFF3 file. [Category: Other]"\n \n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                      type="string", help="input file name [compulsory] [format: file in GFF format]")\n-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-    parser.add_option("-t", "--types",     dest="types",          action="store", default="mRNA,exon", type="string", help="list of comma-separated types that you want to keep [format: string] [default: mRNA,exon]")\n-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,           type="int",    help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n+\tparser = OptionParser(description = description)\n+\tparser.add_option("-i", "--input",     dest="inputFileName",  action="store",                      type="string", help="input file name [compulsory] [format: file in GFF format]")\n+\tparser.add_option("-o", "--output",    dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+\tparser.add_option("-t", "--types",     dest="types",          action="store", default="mRNA,exon", type="string", help="list of comma-separated types that you want to keep [format: string] [default: mRNA,exon]")\n+\tparser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,           type="int",    help="trace level [format: int]")\n+\t(options, args) = parser.parse_args()\n \n-    cleanGff = CleanGff(options.verbosity)\n-    cleanGff.setInputFileName(options.inputFileName)\n-    cleanGff.setOutputFileName(options.outputFileName)\n-    cleanGff.setAcceptedTypes(options.types.split(","))\n-    cleanGff.run()\n+\tcleanGff = CleanGff(options.verbosity)\n+\tcleanGff.setInputFileName(options.inputFileName)\n+\tcleanGff.setOutputFileName(options.outputFileName)\n+\tcleanGff.setAcceptedTypes(options.types.split(","))\n+\tcleanGff.run()\n \n'