annotate TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py @ 15:255c852351c5 draft

Uploaded
author urgi-team
date Thu, 21 Jul 2016 07:36:44 -0400
parents feef9a0db09d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
1 import re
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
2 from commons.core.seq.BioseqDB import BioseqDB
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
3
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
4 ## Record a collection of bioseqDB representing cluster consensus
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
5 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
6 class ClusterConsensusCollection(object):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
7
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
8 ## constructor
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
9 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
10 # @param clusterFileName string name of file containing the cluster of consensus
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
11 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
12 def __init__(self, clusterFileName):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
13 self._clusterFileName = clusterFileName
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
14 self._lClusterConsensus = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
15
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
16 def __eq__(self, o):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
17 if type(o) is type(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
18 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
19 return False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
20
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
21 def __ne__(self, o):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
22 return not self.__eq__(o)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
23
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
24 def getLClusterConsensus(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
25 return self._lClusterConsensus
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
26
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
27 def fillCollection(self):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
28 iBioseqDBAllCluster = BioseqDB()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
29 fClusterFile = open(self._clusterFileName, "r")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
30 iBioseqDBAllCluster.read(fClusterFile)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
31 fClusterFile.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
32 lHeader = iBioseqDBAllCluster.getHeaderList()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
33 firstHeader = lHeader[0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
34 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
35 clusterConsensus = BioseqDB()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
36 clusterConsensus.setName(previousClusterName)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
37 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
38 for header in lHeader[1:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
39 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
40 if clusterName != previousClusterName:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
41 self._lClusterConsensus.append(clusterConsensus)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
42 previousClusterName = clusterName
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
43 clusterConsensus = BioseqDB()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
44 clusterConsensus.setName(previousClusterName)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
45 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
46 self._lClusterConsensus.append(clusterConsensus)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
47
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
48 def _getClusterNameAndSeqHeader(self, header):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
49 m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
50 clusterNumber = m.group(2)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
51 clusterName = m.group(1) + clusterNumber
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
52 lPartsHeaderheader = header.split(" ")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
53 seqHeader = lPartsHeaderheader[1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
54 return clusterName, seqHeader
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
55
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
56 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
57 ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
58 ibioseq.setHeader(seqHeader)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
59 clusterConsensus.add(ibioseq)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
60
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
61 def getNumClusterForAConsensus(self, seqName):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
62 nbCluster = 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
63 for bioseqDB in self._lClusterConsensus:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
64 if seqName in bioseqDB.getHeaderList():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
65 return nbCluster
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
66 nbCluster += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
67
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
68 def getNumConsensusInCluster(self, numCluster):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
69 return self._lClusterConsensus[numCluster - 1].getSize()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
70
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
71