Mercurial > repos > yufei-luo > s_mart
comparison smart_toolShed/commons/core/seq/ClusterConsensusCollection.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e0f8dcca02ed |
---|---|
1 import re | |
2 from commons.core.seq.BioseqDB import BioseqDB | |
3 | |
4 ## Record a collection of bioseqDB representing cluster consensus | |
5 # | |
6 class ClusterConsensusCollection(object): | |
7 | |
8 ## constructor | |
9 # | |
10 # @param clusterFileName string name of file containing the cluster of consensus | |
11 # | |
12 def __init__(self, clusterFileName): | |
13 self._clusterFileName = clusterFileName | |
14 self._lClusterConsensus = [] | |
15 | |
16 def __eq__(self, o): | |
17 return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus | |
18 | |
19 def getLClusterConsensus(self): | |
20 return self._lClusterConsensus | |
21 | |
22 def fillCollection(self): | |
23 iBioseqDBAllCluster = BioseqDB() | |
24 fClusterFile = open(self._clusterFileName, "r") | |
25 iBioseqDBAllCluster.read(fClusterFile) | |
26 fClusterFile.close() | |
27 lHeader = iBioseqDBAllCluster.getHeaderList() | |
28 firstHeader = lHeader[0] | |
29 previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader) | |
30 clusterConsensus = BioseqDB() | |
31 clusterConsensus.setName(previousClusterName) | |
32 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus) | |
33 for header in lHeader[1:]: | |
34 clusterName, seqHeader = self._getClusterNameAndSeqHeader(header) | |
35 if clusterName != previousClusterName: | |
36 self._lClusterConsensus.append(clusterConsensus) | |
37 previousClusterName = clusterName | |
38 clusterConsensus = BioseqDB() | |
39 clusterConsensus.setName(previousClusterName) | |
40 self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus) | |
41 self._lClusterConsensus.append(clusterConsensus) | |
42 | |
43 def _getClusterNameAndSeqHeader(self, header): | |
44 m = re.match("(\D*)(\d+)Mb\d+\s.*", header) | |
45 clusterNumber = m.group(2) | |
46 clusterName = m.group(1) + clusterNumber | |
47 lPartsHeaderheader = header.split(" ") | |
48 seqHeader = lPartsHeaderheader[1] | |
49 return clusterName, seqHeader | |
50 | |
51 def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus): | |
52 ibioseq = iBioseqDBAllCluster.fetch(firstHeader) | |
53 ibioseq.setHeader(seqHeader) | |
54 clusterConsensus.add(ibioseq) | |
55 | |
56 def getNumClusterForAConsensus(self, seqName): | |
57 nbCluster = 1 | |
58 for bioseqDB in self._lClusterConsensus: | |
59 if seqName in bioseqDB.getHeaderList(): | |
60 return nbCluster | |
61 nbCluster += 1 | |
62 | |
63 def getNumConsensusInCluster(self, numCluster): | |
64 return self._lClusterConsensus[numCluster - 1].getSize() | |
65 | |
66 |