Mercurial > repos > yufei-luo > s_mart
diff SMART/Java/Python/ncList/NCListCursor.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/Java/Python/ncList/NCListCursor.py Fri Jan 18 04:54:14 2013 -0500 @@ -0,0 +1,325 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +import os, os.path, struct +from commons.core.parsing.GffParser import GffParser +from SMART.Java.Python.misc.Progress import Progress + + +class Data(object): + def __init__(self, hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end): + self.hIndex = hIndex + self.transcript = transcript + self.firstChildLIndex = firstChildLIndex + self.lastChildLIndex = lastChildLIndex + self.start = start + self.end = end + +class NCListCursor(object): + + def __init__(self, cursor = None, ncList = None, lIndex = 0, verbosity = 0): + self._verbosity = verbosity + self._mainListData = [] + if cursor: + self.copy(cursor) + else: + self._ncList = ncList + self.setLIndex(lIndex) + + def setLIndex(self, lIndex): + self._lIndex = lIndex + self._start = None + self._end = None + self._hIndex = None + self._gffIndex = None + self._parentGffIndex = None + self._parentLIndex = None + self._parentHIndex = None + self._parentStart = None + self._parentEnd = None + self._transcript = None + self._firstSiblingLIndex = None + self._lastSiblingLIndex = None + self._firstChildLIndex = None + self._lastChildLIndex = None + self._mainListIndex = lIndex if lIndex < self._ncList.getSizeFirstList() else None + + def precompute(self): + self._mainListIndex = 0 + progress = Progress(self._ncList.getSizeFirstList(), "Precomputing data", self._verbosity) + for i in range(self._ncList.getSizeFirstList()): + gffIndex, hIndex, parentLIndex, start, end = self._ncList.getLLineElements(i) + transcript = self._ncList.getIntervalFromAdress(gffIndex) + firstChildLIndex, nbChildren = self._ncList.getHLineElements(hIndex) + lastChildLIndex = -1 if firstChildLIndex == -1 else firstChildLIndex + nbChildren-1 + self._mainListData.append(Data(hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end)) + progress.inc() + progress.done() + + def _updateFromMainListData(self): + if not self._mainListData or self._lIndex >= self._ncList.getSizeFirstList(): + #print "OUT" + return False + if self._mainListIndex >= self._ncList.getSizeFirstList(): + self._hIndex = -1 + data = self._mainListData[self._mainListIndex] + self._hIndex = data.hIndex + self._transcript = data.transcript + self._firstChildLIndex = data.firstChildLIndex + self._lastChildLIndex = data.lastChildLIndex + self._start = data.start + self._end = data.end + return True + + def getLIndex(self): + return self._lIndex + + def _getCurrentData(self): + self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end = self._ncList.getLLineElements(self._lIndex) + #print "-->", self._lIndex, "-->", self._gffIndex, self._hIndex, self._parentLIndex, self._start, self._end + if self._end == -1: + raise Exception("Error") + + def _getParentData(self): + if self._parentLIndex == None: + self._getCurrentData() + self._parentGffIndex, self._parentHIndex, greatParentLIndex, self._parentStart, self._parentEnd = self._ncList.getLLineElements(self._parentLIndex) + + def _getTranscript(self): + if self._gffIndex == None: + self._getCurrentData() + self._transcript = self._ncList.getIntervalFromAdress(self._gffIndex) + + def _getSiblingData(self): + if self._parentHIndex == None: + self._getParentData() + if self._parentHIndex == -1: + self._firstSiblingLIndex = 0 + self._lastSiblingLIndex = self._ncList.getSizeFirstList() - 1 + else: + self._firstSiblingLIndex, nbSiblings = self._ncList.getHLineElements(self._parentHIndex) + self._lastSiblingLIndex = -1 if self._firstSiblingLIndex == -1 else self._firstSiblingLIndex + nbSiblings-1 + + def _getChildrenData(self): + if self._hIndex == None: + self._getCurrentData() + self._firstChildLIndex, nbChildren = self._ncList.getHLineElements(self._hIndex) + self._lastChildLIndex = -1 if self._firstChildLIndex == -1 else self._firstChildLIndex + nbChildren-1 + + def getGffAddress(self): + if self._gffIndex == None: + self._getCurrentData() + return self._gffIndex + + def getStart(self): + if self._start == None: + self._getCurrentData() + return self._start + + def getEnd(self): + if self._end == None: + self._getCurrentData() + return self._end + + def compare(self, cursor): + return (self._lIndex == cursor._lIndex) + + def getTranscript(self): + if self.isOut(): + return None + if self._transcript == None: + self._getTranscript() + return self._transcript + + def isFirst(self): + #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex + if self._lIndex < self._ncList.getSizeFirstList() - 1: + return (self._lIndex == 0) + if self._firstSiblingLIndex == None: + self._getSiblingData() + return (self._lIndex == self._firstSiblingLIndex) + + def isLast(self): + #print "is last: ", self._lIndex, self._ncList.getSizeFirstList(), self._lastSiblingLIndex + if self._lIndex < self._ncList.getSizeFirstList() - 1: + return (self._lIndex == self._ncList.getSizeFirstList() - 1) + if self._lastSiblingLIndex == None: + self._getSiblingData() + return (self._lIndex == self._lastSiblingLIndex) + + def moveUp(self): + if self._parentLIndex == None: + self._getCurrentData() + self._lIndex = self._parentLIndex + self._updateFromMainListData() + self._hIndex = self._parentHIndex + self._gffIndex = self._parentGffIndex + self._parentLIndex = None + self._parentHIndex = None + self._parentGffIndex = None + self._transcript = None + self._firstSiblingLIndex = None + self._lastSiblingLIndex = None + self._firstChildLIndex = self._firstChildLIndex + self._lastChildLIndex = self._lastChildLIndex + self._start = self._parentStart + self._end = self._parentEnd + self._parentStart = None + self._parentEnd = None + + def moveRight(self): + if self.isOut(): + return + #print "IN1", self + if self._lIndex < self._ncList.getSizeFirstList() - 1 and self._mainListIndex != None: + self._mainListIndex += 1 + self._updateFromMainListData() + #print "IN2", self + self._lIndex += 1 + self._hIndex = None + self._start = None + self._end = None + self._transcript = None + self._gffIndex = None + self._firstChildLIndex = None + self._lastChildLIndex = None + #print "IN3", self + + def moveNext(self): + while not self.isOut() and self.isLast(): + if self.isTop(): + self._lIndex = -1 + return + self.moveUp() + #print "F1", self + self.moveRight() + #print "F2", self + + def moveMiddleSibling(self): + if self._lIndex < self._ncList.getSizeFirstList() - 1: + self._mainListIndex = (self._ncList.getSizeFirstList() - 1) / 2 + self._updateFromMainListData() + if self._lastSiblingLIndex == None: + self._getSiblingData() + self._lIndex = (self._lastSiblingLIndex + self._firstSiblingLIndex) / 2 + self._hIndex = None + self._start = None + self._end = None + self._gffIndex = None + self._transcript = None + self._firstChildLIndex = None + self._lastChildLIndex = None + + def moveSibling(self, lIndex): + if self._lIndex < self._ncList.getSizeFirstList() - 1: + self._mainListIndex = lIndex + self._updateFromMainListData() + self._lIndex = lIndex + self._hIndex = None + self._start = None + self._end = None + self._gffIndex = None + self._transcript = None + self._firstChildLIndex = None + self._lastChildLIndex = None + + def moveLastSibling(self): + if self._lIndex < self._ncList.getSizeFirstList() - 1: + self._mainListIndex = self._ncList.getSizeFirstList() - 1 + self._updateFromMainListData() + if self._lastSiblingLIndex == None: + self._getSiblingData() + self._lIndex = self._lastSiblingLIndex + self._hIndex = None + self._start = None + self._end = None + self._gffIndex = None + self._transcript = None + self._firstChildLIndex = None + self._lastChildLIndex = None + + def moveDown(self): + if self._firstChildLIndex == None: + self._getChildrenData() + self._parentLIndex = self._lIndex + self._parentHIndex = self._hIndex + self._parentGffIndex = self._gffIndex + self._lIndex = self._firstChildLIndex + self._lastSiblingLIndex = self._lastChildLIndex + self._hIndex = None + self._gffIndex = None + self._transcript = None + self._firstChildLIndex = None + self._lastChildLIndex = None + self._parentStart = self._start + self._parentEnd = self._end + self._start = None + self._end = None + + def isOut(self): + return (self._lIndex == -1) + + def isTop(self): + if self._parentLIndex == None: + self._getCurrentData() + return (self._parentLIndex == -1) + + def hasChildren(self): + if self._hIndex == None: + self._getCurrentData() + if self._hIndex == -1: + return False + if self._firstChildLIndex == None: + self._getChildrenData() + return (self._firstChildLIndex != -1) + + def copy(self, cursor): + self._ncList = cursor._ncList + self._lIndex = cursor._lIndex + self._hIndex = cursor._hIndex + self._gffIndex = cursor._gffIndex + self._parentLIndex = cursor._parentLIndex + self._parentHIndex = cursor._parentHIndex + self._parentGffIndex = cursor._parentGffIndex + self._transcript = cursor._transcript + self._firstSiblingLIndex = cursor._firstSiblingLIndex + self._lastSiblingLIndex = cursor._lastSiblingLIndex + self._firstChildLIndex = cursor._firstChildLIndex + self._lastChildLIndex = cursor._lastChildLIndex + self._mainListData = cursor._mainListData + self._mainListIndex = cursor._mainListIndex + self._verbosity = cursor._verbosity + self._parentStart = cursor._parentStart + self._parentEnd = cursor._parentEnd + self._start = cursor._start + self._end = cursor._end + + def __str__(self): + return "NC-list: %s, Lindex: %s, Hindex: %s, GFFindex: %s, start: %s, end: %s, parent Lindex: %s, parent Hindex: %s, parent GFFindex: %s, transcript: %s, last sibling: %s" % (self._ncList, self._lIndex, self._hIndex, self._gffIndex, self._start, self._end, self._parentLIndex, self._parentHIndex, self._parentGffIndex, self._transcript, self._lastSiblingLIndex)