Mercurial > repos > urgi-team > teiso
view TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py @ 15:255c852351c5 draft
Uploaded
author | urgi-team |
---|---|
date | Thu, 21 Jul 2016 07:36:44 -0400 |
parents | feef9a0db09d |
children |
line wrap: on
line source
import re from commons.core.seq.BioseqDB import BioseqDB ## Record a collection of bioseqDB representing cluster consensus # class ClusterConsensusCollection(object): ## constructor # # @param clusterFileName string name of file containing the cluster of consensus # def __init__(self, clusterFileName): self._clusterFileName = clusterFileName self._lClusterConsensus = [] def __eq__(self, o): if type(o) is type(self): return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus return False def __ne__(self, o): return not self.__eq__(o) def getLClusterConsensus(self): return self._lClusterConsensus def fillCollection(self): iBioseqDBAllCluster = BioseqDB() fClusterFile = open(self._clusterFileName, "r") iBioseqDBAllCluster.read(fClusterFile) fClusterFile.close() lHeader = iBioseqDBAllCluster.getHeaderList() firstHeader = lHeader[0] previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader) clusterConsensus = BioseqDB() clusterConsensus.setName(previousClusterName) self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus) for header in lHeader[1:]: clusterName, seqHeader = self._getClusterNameAndSeqHeader(header) if clusterName != previousClusterName: self._lClusterConsensus.append(clusterConsensus) previousClusterName = clusterName clusterConsensus = BioseqDB() clusterConsensus.setName(previousClusterName) self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus) self._lClusterConsensus.append(clusterConsensus) def _getClusterNameAndSeqHeader(self, header): m = re.match("(\D*)(\d+)Mb\d+\s.*", header) clusterNumber = m.group(2) clusterName = m.group(1) + clusterNumber lPartsHeaderheader = header.split(" ") seqHeader = lPartsHeaderheader[1] return clusterName, seqHeader def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus): ibioseq = iBioseqDBAllCluster.fetch(firstHeader) ibioseq.setHeader(seqHeader) clusterConsensus.add(ibioseq) def getNumClusterForAConsensus(self, seqName): nbCluster = 1 for bioseqDB in self._lClusterConsensus: if seqName in bioseqDB.getHeaderList(): return nbCluster nbCluster += 1 def getNumConsensusInCluster(self, numCluster): return self._lClusterConsensus[numCluster - 1].getSize()