diff TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py @ 13:feef9a0db09d draft

Uploaded
author urgi-team
date Wed, 20 Jul 2016 09:04:42 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TEisotools-1.1.a/commons/core/seq/ClusterConsensusCollection.py	Wed Jul 20 09:04:42 2016 -0400
@@ -0,0 +1,71 @@
+import re
+from commons.core.seq.BioseqDB import BioseqDB
+
+## Record a collection of bioseqDB representing cluster consensus
+#
+class ClusterConsensusCollection(object):
+
+    ## constructor
+    #
+    # @param clusterFileName string name of file containing the cluster of consensus
+    #
+    def __init__(self, clusterFileName):
+        self._clusterFileName = clusterFileName
+        self._lClusterConsensus = []
+
+    def __eq__(self, o):
+        if type(o) is type(self):
+            return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
+        return False
+    
+    def __ne__(self, o):
+        return not self.__eq__(o)
+
+    def getLClusterConsensus(self):
+        return self._lClusterConsensus
+    
+    def fillCollection(self):
+        iBioseqDBAllCluster = BioseqDB()
+        fClusterFile = open(self._clusterFileName, "r")
+        iBioseqDBAllCluster.read(fClusterFile)
+        fClusterFile.close()
+        lHeader = iBioseqDBAllCluster.getHeaderList()
+        firstHeader = lHeader[0]
+        previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
+        clusterConsensus = BioseqDB()
+        clusterConsensus.setName(previousClusterName)
+        self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
+        for header in lHeader[1:]:
+            clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
+            if clusterName != previousClusterName:
+                self._lClusterConsensus.append(clusterConsensus)
+                previousClusterName = clusterName
+                clusterConsensus = BioseqDB()
+                clusterConsensus.setName(previousClusterName)
+            self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
+        self._lClusterConsensus.append(clusterConsensus)
+                
+    def _getClusterNameAndSeqHeader(self, header):
+        m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
+        clusterNumber = m.group(2)
+        clusterName = m.group(1) + clusterNumber
+        lPartsHeaderheader = header.split(" ")
+        seqHeader = lPartsHeaderheader[1]
+        return clusterName, seqHeader
+
+    def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
+        ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
+        ibioseq.setHeader(seqHeader)
+        clusterConsensus.add(ibioseq)
+        
+    def getNumClusterForAConsensus(self, seqName):
+        nbCluster = 1
+        for bioseqDB in self._lClusterConsensus:
+            if seqName in bioseqDB.getHeaderList():
+                return nbCluster
+            nbCluster += 1
+            
+    def getNumConsensusInCluster(self, numCluster):
+        return self._lClusterConsensus[numCluster - 1].getSize()
+
+