Mercurial > repos > urgi-team > teiso
diff TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py @ 13:feef9a0db09d draft
Uploaded
author | urgi-team |
---|---|
date | Wed, 20 Jul 2016 09:04:42 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py Wed Jul 20 09:04:42 2016 -0400 @@ -0,0 +1,71 @@ +import re +from commons.core.seq.BioseqDB import BioseqDB + +## Record a collection of bioseqDB representing cluster consensus +# +class ClusterConsensusCollection(object): + + ## constructor + # + # @param clusterFileName string name of file containing the cluster of consensus + # + def __init__(self, clusterFileName): + self._clusterFileName = clusterFileName + self._lClusterConsensus = [] + + def __eq__(self, o): + if type(o) is type(self): + return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus + return False + + def __ne__(self, o): + return not self.__eq__(o) + + def getLClusterConsensus(self): + return self._lClusterConsensus + + def fillCollection(self): + iBioseqDBAllCluster = BioseqDB() + fClusterFile = open(self._clusterFileName, "r") + iBioseqDBAllCluster.read(fClusterFile) + fClusterFile.close() + lHeader = iBioseqDBAllCluster.getHeaderList() + firstHeader = lHeader[0] + previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader) + clusterConsensus = BioseqDB() + clusterConsensus.setName(previousClusterName) + self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus) + for header in lHeader[1:]: + clusterName, seqHeader = self._getClusterNameAndSeqHeader(header) + if clusterName != previousClusterName: + self._lClusterConsensus.append(clusterConsensus) + previousClusterName = clusterName + clusterConsensus = BioseqDB() + clusterConsensus.setName(previousClusterName) + self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus) + self._lClusterConsensus.append(clusterConsensus) + + def _getClusterNameAndSeqHeader(self, header): + m = re.match("(\D*)(\d+)Mb\d+\s.*", header) + clusterNumber = m.group(2) + clusterName = m.group(1) + clusterNumber + lPartsHeaderheader = header.split(" ") + seqHeader = lPartsHeaderheader[1] + return clusterName, seqHeader + + def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus): + ibioseq = iBioseqDBAllCluster.fetch(firstHeader) + ibioseq.setHeader(seqHeader) + clusterConsensus.add(ibioseq) + + def getNumClusterForAConsensus(self, seqName): + nbCluster = 1 + for bioseqDB in self._lClusterConsensus: + if seqName in bioseqDB.getHeaderList(): + return nbCluster + nbCluster += 1 + + def getNumConsensusInCluster(self, numCluster): + return self._lClusterConsensus[numCluster - 1].getSize() + +