comparison TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py @ 13:feef9a0db09d draft

Uploaded
author urgi-team
date Wed, 20 Jul 2016 09:04:42 -0400
parents
children
comparison
equal deleted inserted replaced
12:22b0494ec883 13:feef9a0db09d
1 import re
2 from commons.core.seq.BioseqDB import BioseqDB
3
4 ## Record a collection of bioseqDB representing cluster consensus
5 #
6 class ClusterConsensusCollection(object):
7
8 ## constructor
9 #
10 # @param clusterFileName string name of file containing the cluster of consensus
11 #
12 def __init__(self, clusterFileName):
13 self._clusterFileName = clusterFileName
14 self._lClusterConsensus = []
15
16 def __eq__(self, o):
17 if type(o) is type(self):
18 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
19 return False
20
21 def __ne__(self, o):
22 return not self.__eq__(o)
23
24 def getLClusterConsensus(self):
25 return self._lClusterConsensus
26
27 def fillCollection(self):
28 iBioseqDBAllCluster = BioseqDB()
29 fClusterFile = open(self._clusterFileName, "r")
30 iBioseqDBAllCluster.read(fClusterFile)
31 fClusterFile.close()
32 lHeader = iBioseqDBAllCluster.getHeaderList()
33 firstHeader = lHeader[0]
34 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
35 clusterConsensus = BioseqDB()
36 clusterConsensus.setName(previousClusterName)
37 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
38 for header in lHeader[1:]:
39 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
40 if clusterName != previousClusterName:
41 self._lClusterConsensus.append(clusterConsensus)
42 previousClusterName = clusterName
43 clusterConsensus = BioseqDB()
44 clusterConsensus.setName(previousClusterName)
45 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
46 self._lClusterConsensus.append(clusterConsensus)
47
48 def _getClusterNameAndSeqHeader(self, header):
49 m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
50 clusterNumber = m.group(2)
51 clusterName = m.group(1) + clusterNumber
52 lPartsHeaderheader = header.split(" ")
53 seqHeader = lPartsHeaderheader[1]
54 return clusterName, seqHeader
55
56 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
57 ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
58 ibioseq.setHeader(seqHeader)
59 clusterConsensus.add(ibioseq)
60
61 def getNumClusterForAConsensus(self, seqName):
62 nbCluster = 1
63 for bioseqDB in self._lClusterConsensus:
64 if seqName in bioseqDB.getHeaderList():
65 return nbCluster
66 nbCluster += 1
67
68 def getNumConsensusInCluster(self, numCluster):
69 return self._lClusterConsensus[numCluster - 1].getSize()
70
71