diff SMART/Java/Python/ncList/NCListCursor.py @ 36:44d5973c188c

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 15:02:29 -0400
parents 769e306b7933
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ncList/NCListCursor.py	Tue Apr 30 15:02:29 2013 -0400
@@ -0,0 +1,325 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os, os.path, struct
+from commons.core.parsing.GffParser import GffParser
+from SMART.Java.Python.misc.Progress import Progress
+
+
+class Data(object):
+    def __init__(self, hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end):
+        self.hIndex           = hIndex
+        self.transcript       = transcript
+        self.firstChildLIndex = firstChildLIndex
+        self.lastChildLIndex  = lastChildLIndex
+        self.start            = start
+        self.end              = end
+
+class NCListCursor(object):
+
+    def __init__(self, cursor = None, ncList = None, lIndex = 0, verbosity = 0):
+        self._verbosity = verbosity
+        self._mainListData = []
+        if cursor:
+            self.copy(cursor)
+        else:
+            self._ncList = ncList
+            self.setLIndex(lIndex)
+
+    def setLIndex(self, lIndex):
+        self._lIndex             = lIndex
+        self._start              = None
+        self._end                = None
+        self._hIndex             = None
+        self._gffIndex           = None
+        self._parentGffIndex     = None
+        self._parentLIndex       = None
+        self._parentHIndex       = None
+        self._parentStart        = None
+        self._parentEnd          = None
+        self._transcript         = None
+        self._firstSiblingLIndex = None
+        self._lastSiblingLIndex  = None
+        self._firstChildLIndex   = None
+        self._lastChildLIndex    = None
+        self._mainListIndex      = lIndex if lIndex < self._ncList.getSizeFirstList() else None
+
+    def precompute(self):
+        self._mainListIndex = 0
+        progress = Progress(self._ncList.getSizeFirstList(), "Precomputing data", self._verbosity)
+        for i in range(self._ncList.getSizeFirstList()):
+            gffIndex, hIndex, parentLIndex, start, end = self._ncList.getLLineElements(i)
+            transcript = self._ncList.getIntervalFromAdress(gffIndex)
+            firstChildLIndex, nbChildren = self._ncList.getHLineElements(hIndex)
+            lastChildLIndex = -1 if firstChildLIndex == -1 else firstChildLIndex + nbChildren-1
+            self._mainListData.append(Data(hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end))
+            progress.inc()
+        progress.done()
+
+    def _updateFromMainListData(self):
+        if not self._mainListData or self._lIndex >= self._ncList.getSizeFirstList():
+            #print "OUT"
+            return False
+        if self._mainListIndex >= self._ncList.getSizeFirstList():
+            self._hIndex = -1
+        data = self._mainListData[self._mainListIndex]
+        self._hIndex           = data.hIndex
+        self._transcript       = data.transcript
+        self._firstChildLIndex = data.firstChildLIndex
+        self._lastChildLIndex  = data.lastChildLIndex
+        self._start            = data.start
+        self._end              = data.end
+        return True
+
+    def getLIndex(self):
+        return self._lIndex
+
+    def _getCurrentData(self):
+        self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end = self._ncList.getLLineElements(self._lIndex)
+        #print "-->", self._lIndex, "-->", self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end
+        if self._end == -1:
+            raise Exception("Error")
+
+    def _getParentData(self):
+        if self._parentLIndex == None:
+            self._getCurrentData()
+        self._parentGffIndex, self._parentHIndex, greatParentLIndex, self._parentStart, self._parentEnd = self._ncList.getLLineElements(self._parentLIndex)
+
+    def _getTranscript(self):
+        if self._gffIndex == None:
+            self._getCurrentData()
+        self._transcript = self._ncList.getIntervalFromAdress(self._gffIndex)
+
+    def _getSiblingData(self):
+        if self._parentHIndex == None:
+            self._getParentData()
+        if self._parentHIndex == -1:
+            self._firstSiblingLIndex = 0
+            self._lastSiblingLIndex  = self._ncList.getSizeFirstList() - 1
+        else:
+            self._firstSiblingLIndex, nbSiblings = self._ncList.getHLineElements(self._parentHIndex)
+            self._lastSiblingLIndex = -1 if self._firstSiblingLIndex == -1 else self._firstSiblingLIndex + nbSiblings-1
+
+    def _getChildrenData(self):
+        if self._hIndex == None:
+            self._getCurrentData()
+        self._firstChildLIndex, nbChildren = self._ncList.getHLineElements(self._hIndex)
+        self._lastChildLIndex = -1 if self._firstChildLIndex == -1 else self._firstChildLIndex + nbChildren-1
+
+    def getGffAddress(self):
+        if self._gffIndex == None:
+            self._getCurrentData()
+        return self._gffIndex
+
+    def getStart(self):
+        if self._start == None:
+            self._getCurrentData()
+        return self._start
+
+    def getEnd(self):
+        if self._end == None:
+            self._getCurrentData()
+        return self._end
+
+    def compare(self, cursor):
+        return (self._lIndex == cursor._lIndex)
+
+    def getTranscript(self):
+        if self.isOut():
+            return None
+        if self._transcript == None:
+            self._getTranscript()
+        return self._transcript
+        
+    def isFirst(self):
+        #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex
+        if self._lIndex < self._ncList.getSizeFirstList() - 1:
+            return (self._lIndex == 0)
+        if self._firstSiblingLIndex == None:
+            self._getSiblingData()
+        return (self._lIndex == self._firstSiblingLIndex)
+        
+    def isLast(self):
+        #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex
+        if self._lIndex < self._ncList.getSizeFirstList() - 1:
+            return (self._lIndex == self._ncList.getSizeFirstList() - 1)
+        if self._lastSiblingLIndex == None:
+            self._getSiblingData()
+        return (self._lIndex == self._lastSiblingLIndex)
+        
+    def moveUp(self):
+        if self._parentLIndex == None:
+            self._getCurrentData()
+        self._lIndex = self._parentLIndex
+        self._updateFromMainListData()
+        self._hIndex             = self._parentHIndex
+        self._gffIndex           = self._parentGffIndex
+        self._parentLIndex       = None
+        self._parentHIndex       = None
+        self._parentGffIndex     = None
+        self._transcript         = None
+        self._firstSiblingLIndex = None
+        self._lastSiblingLIndex  = None
+        self._firstChildLIndex   = self._firstChildLIndex
+        self._lastChildLIndex    = self._lastChildLIndex
+        self._start              = self._parentStart
+        self._end                = self._parentEnd
+        self._parentStart        = None
+        self._parentEnd          = None
+        
+    def moveRight(self):
+        if self.isOut():
+            return
+        #print "IN1", self
+        if self._lIndex < self._ncList.getSizeFirstList() - 1 and self._mainListIndex != None:
+            self._mainListIndex += 1
+            self._updateFromMainListData()
+        #print "IN2", self
+        self._lIndex          += 1
+        self._hIndex           = None
+        self._start            = None
+        self._end              = None
+        self._transcript       = None
+        self._gffIndex         = None
+        self._firstChildLIndex = None
+        self._lastChildLIndex  = None
+        #print "IN3", self
+        
+    def moveNext(self):
+        while not self.isOut() and self.isLast():
+            if self.isTop():
+                self._lIndex = -1
+                return
+            self.moveUp()
+        #print "F1", self
+        self.moveRight()
+        #print "F2", self
+    
+    def moveMiddleSibling(self):
+        if self._lIndex < self._ncList.getSizeFirstList() - 1:
+            self._mainListIndex = (self._ncList.getSizeFirstList() - 1) / 2
+            self._updateFromMainListData()
+        if self._lastSiblingLIndex == None:
+            self._getSiblingData()
+        self._lIndex           = (self._lastSiblingLIndex + self._firstSiblingLIndex) / 2
+        self._hIndex           = None
+        self._start            = None
+        self._end              = None
+        self._gffIndex         = None
+        self._transcript       = None
+        self._firstChildLIndex = None
+        self._lastChildLIndex  = None
+
+    def moveSibling(self, lIndex):
+        if self._lIndex < self._ncList.getSizeFirstList() - 1:
+            self._mainListIndex = lIndex
+            self._updateFromMainListData()
+        self._lIndex           = lIndex
+        self._hIndex           = None
+        self._start            = None
+        self._end              = None
+        self._gffIndex         = None
+        self._transcript       = None
+        self._firstChildLIndex = None
+        self._lastChildLIndex  = None
+
+    def moveLastSibling(self):
+        if self._lIndex < self._ncList.getSizeFirstList() - 1:
+            self._mainListIndex = self._ncList.getSizeFirstList() - 1
+            self._updateFromMainListData()
+        if self._lastSiblingLIndex == None:
+            self._getSiblingData()
+        self._lIndex           = self._lastSiblingLIndex
+        self._hIndex           = None
+        self._start            = None
+        self._end              = None
+        self._gffIndex         = None
+        self._transcript       = None
+        self._firstChildLIndex = None
+        self._lastChildLIndex  = None
+
+    def moveDown(self):
+        if self._firstChildLIndex == None:
+            self._getChildrenData()
+        self._parentLIndex      = self._lIndex
+        self._parentHIndex      = self._hIndex
+        self._parentGffIndex    = self._gffIndex
+        self._lIndex            = self._firstChildLIndex
+        self._lastSiblingLIndex = self._lastChildLIndex
+        self._hIndex            = None
+        self._gffIndex          = None
+        self._transcript        = None
+        self._firstChildLIndex  = None
+        self._lastChildLIndex   = None
+        self._parentStart       = self._start
+        self._parentEnd         = self._end
+        self._start             = None
+        self._end               = None
+
+    def isOut(self):
+        return (self._lIndex == -1)
+
+    def isTop(self):
+        if self._parentLIndex == None:
+            self._getCurrentData()
+        return (self._parentLIndex == -1)
+
+    def hasChildren(self):
+        if self._hIndex == None:
+            self._getCurrentData()
+        if self._hIndex == -1:
+            return False
+        if self._firstChildLIndex == None:
+            self._getChildrenData()
+        return (self._firstChildLIndex != -1)
+
+    def copy(self, cursor):
+        self._ncList             = cursor._ncList
+        self._lIndex             = cursor._lIndex
+        self._hIndex             = cursor._hIndex
+        self._gffIndex           = cursor._gffIndex
+        self._parentLIndex       = cursor._parentLIndex
+        self._parentHIndex       = cursor._parentHIndex
+        self._parentGffIndex     = cursor._parentGffIndex
+        self._transcript         = cursor._transcript
+        self._firstSiblingLIndex = cursor._firstSiblingLIndex
+        self._lastSiblingLIndex  = cursor._lastSiblingLIndex
+        self._firstChildLIndex   = cursor._firstChildLIndex
+        self._lastChildLIndex    = cursor._lastChildLIndex
+        self._mainListData       = cursor._mainListData
+        self._mainListIndex      = cursor._mainListIndex
+        self._verbosity          = cursor._verbosity
+        self._parentStart        = cursor._parentStart
+        self._parentEnd          = cursor._parentEnd
+        self._start              = cursor._start
+        self._end                = cursor._end
+
+    def __str__(self):
+        return "NC-list: %s, Lindex: %s, Hindex: %s, GFFindex: %s, start: %s, end: %s, parent Lindex: %s, parent Hindex: %s, parent GFFindex: %s, transcript: %s, last sibling: %s" % (self._ncList, self._lIndex, self._hIndex, self._gffIndex, self._start, self._end, self._parentLIndex, self._parentHIndex, self._parentGffIndex, self._transcript, self._lastSiblingLIndex)