Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py @ 13:feef9a0db09d draft
Uploaded
author | urgi-team |
---|---|
date | Wed, 20 Jul 2016 09:04:42 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
12:22b0494ec883 | 13:feef9a0db09d |
---|---|
1 import re | |
2 from commons.core.seq.BioseqDB import BioseqDB | |
3 | |
4 ## Record a collection of bioseqDB representing cluster consensus | |
5 # | |
6 class ClusterConsensusCollection(object): | |
7 | |
8 ## constructor | |
9 # | |
10 # @param clusterFileName string name of file containing the cluster of consensus | |
11 # | |
12 def __init__(self, clusterFileName): | |
13 self._clusterFileName = clusterFileName | |
14 self._lClusterConsensus = [] | |
15 | |
16 def __eq__(self, o): | |
17 if type(o) is type(self): | |
18 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus | |
19 return False | |
20 | |
21 def __ne__(self, o): | |
22 return not self.__eq__(o) | |
23 | |
24 def getLClusterConsensus(self): | |
25 return self._lClusterConsensus | |
26 | |
27 def fillCollection(self): | |
28 iBioseqDBAllCluster = BioseqDB() | |
29 fClusterFile = open(self._clusterFileName, "r") | |
30 iBioseqDBAllCluster.read(fClusterFile) | |
31 fClusterFile.close() | |
32 lHeader = iBioseqDBAllCluster.getHeaderList() | |
33 firstHeader = lHeader[0] | |
34 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader) | |
35 clusterConsensus = BioseqDB() | |
36 clusterConsensus.setName(previousClusterName) | |
37 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus) | |
38 for header in lHeader[1:]: | |
39 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header) | |
40 if clusterName != previousClusterName: | |
41 self._lClusterConsensus.append(clusterConsensus) | |
42 previousClusterName = clusterName | |
43 clusterConsensus = BioseqDB() | |
44 clusterConsensus.setName(previousClusterName) | |
45 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus) | |
46 self._lClusterConsensus.append(clusterConsensus) | |
47 | |
48 def _getClusterNameAndSeqHeader(self, header): | |
49 m = re.match("(\D*)(\d+)Mb\d+\s.*", header) | |
50 clusterNumber = m.group(2) | |
51 clusterName = m.group(1) + clusterNumber | |
52 lPartsHeaderheader = header.split(" ") | |
53 seqHeader = lPartsHeaderheader[1] | |
54 return clusterName, seqHeader | |
55 | |
56 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus): | |
57 ibioseq = iBioseqDBAllCluster.fetch(firstHeader) | |
58 ibioseq.setHeader(seqHeader) | |
59 clusterConsensus.add(ibioseq) | |
60 | |
61 def getNumClusterForAConsensus(self, seqName): | |
62 nbCluster = 1 | |
63 for bioseqDB in self._lClusterConsensus: | |
64 if seqName in bioseqDB.getHeaderList(): | |
65 return nbCluster | |
66 nbCluster += 1 | |
67 | |
68 def getNumConsensusInCluster(self, numCluster): | |
69 return self._lClusterConsensus[numCluster - 1].getSize() | |
70 | |
71 |