13
|
1 import re
|
|
2 from commons.core.seq.BioseqDB import BioseqDB
|
|
3
|
|
4 ## Record a collection of bioseqDB representing cluster consensus
|
|
5 #
|
|
6 class ClusterConsensusCollection(object):
|
|
7
|
|
8 ## constructor
|
|
9 #
|
|
10 # @param clusterFileName string name of file containing the cluster of consensus
|
|
11 #
|
|
12 def __init__(self, clusterFileName):
|
|
13 self._clusterFileName = clusterFileName
|
|
14 self._lClusterConsensus = []
|
|
15
|
|
16 def __eq__(self, o):
|
|
17 if type(o) is type(self):
|
|
18 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
|
|
19 return False
|
|
20
|
|
21 def __ne__(self, o):
|
|
22 return not self.__eq__(o)
|
|
23
|
|
24 def getLClusterConsensus(self):
|
|
25 return self._lClusterConsensus
|
|
26
|
|
27 def fillCollection(self):
|
|
28 iBioseqDBAllCluster = BioseqDB()
|
|
29 fClusterFile = open(self._clusterFileName, "r")
|
|
30 iBioseqDBAllCluster.read(fClusterFile)
|
|
31 fClusterFile.close()
|
|
32 lHeader = iBioseqDBAllCluster.getHeaderList()
|
|
33 firstHeader = lHeader[0]
|
|
34 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
|
|
35 clusterConsensus = BioseqDB()
|
|
36 clusterConsensus.setName(previousClusterName)
|
|
37 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
|
|
38 for header in lHeader[1:]:
|
|
39 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
|
|
40 if clusterName != previousClusterName:
|
|
41 self._lClusterConsensus.append(clusterConsensus)
|
|
42 previousClusterName = clusterName
|
|
43 clusterConsensus = BioseqDB()
|
|
44 clusterConsensus.setName(previousClusterName)
|
|
45 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
|
|
46 self._lClusterConsensus.append(clusterConsensus)
|
|
47
|
|
48 def _getClusterNameAndSeqHeader(self, header):
|
|
49 m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
|
|
50 clusterNumber = m.group(2)
|
|
51 clusterName = m.group(1) + clusterNumber
|
|
52 lPartsHeaderheader = header.split(" ")
|
|
53 seqHeader = lPartsHeaderheader[1]
|
|
54 return clusterName, seqHeader
|
|
55
|
|
56 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
|
|
57 ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
|
|
58 ibioseq.setHeader(seqHeader)
|
|
59 clusterConsensus.add(ibioseq)
|
|
60
|
|
61 def getNumClusterForAConsensus(self, seqName):
|
|
62 nbCluster = 1
|
|
63 for bioseqDB in self._lClusterConsensus:
|
|
64 if seqName in bioseqDB.getHeaderList():
|
|
65 return nbCluster
|
|
66 nbCluster += 1
|
|
67
|
|
68 def getNumConsensusInCluster(self, numCluster):
|
|
69 return self._lClusterConsensus[numCluster - 1].getSize()
|
|
70
|
|
71
|