diff commons/core/sql/TableBinSetAdaptator.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/sql/TableBinSetAdaptator.py	Fri Jan 18 04:54:14 2013 -0500
@@ -0,0 +1,265 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+from commons.core.sql.TableSetAdaptator import TableSetAdaptator
+from commons.core.coord.SetUtils import SetUtils
+
+## Adaptator for Set tables with bin indexes
+#
+class TableBinSetAdaptator(TableSetAdaptator):
+   
+    ## constructor
+    #
+    # @param iDb DbMySql instance instance of DbMySql
+    # @param tableName string table name (default = "")
+    #
+    def __init__(self, iDb, tableName = ""):
+        TableSetAdaptator.__init__(self, iDb, tableName)
+        self._table_idx = "%s_idx" % (self._table)
+        
+    ## Insert a set instance in a set bin table
+    # 
+    # @param iSet set instance an instance of set object
+    # @param delayed boolean an insert delayed or not
+    #
+    def insASetInSetAndBinTable(self, iSet, delayed = False):
+        self.insert(iSet, delayed)
+        iSet.seqname = iSet.seqname.replace("\\", "\\\\")
+        iSet.name = iSet.name.replace("\\", "\\\\")
+        bin = iSet.getBin()
+        max = iSet.getMax()
+        min = iSet.getMin()
+        strand = iSet.isOnDirectStrand()
+        sql_prefix = ''
+        if delayed:
+            sql_prefix = 'INSERT DELAYED INTO '
+        else:
+            sql_prefix = 'INSERT INTO '
+        sql_cmd = sql_prefix + '%s VALUES (%d,%f,"%s",%d,%d,%d)'\
+                 %(self._table_idx,\
+                   iSet.id,\
+                   bin,\
+                   iSet.seqname,\
+                   min,\
+                   max,\
+                   strand)
+        self._iDb.execute(sql_cmd)
+
+    ## Delete set corresponding to a given identifier number in set and bin set table
+    # @param id integer identifier number
+    # @note old name was delSet_from_num
+    #
+    def deleteFromIdFromSetAndBinTable(self, id):
+        self.deleteFromId(id)
+        sql_cmd = 'delete from %s where path=%d' % (self._table_idx, id)
+        self._iDb.execute(sql_cmd)
+
+    ## Delete path corresponding to a given list of identifier number
+    #
+    # @param lId integer list list of identifier number
+    # @note old name was delSet_from_listnum
+    #
+    def deleteFromListIdFromSetAndBinTable(self, lId):
+        if lId != []:
+            self.deleteFromIdList(lId)
+            sql_cmd = 'delete from %s where path=%d' % (self._table_idx, lId[0])
+            for i in lId[1:]:
+                sql_cmd += " or path=%d" % (i)
+            self._iDb.execute(sql_cmd)
+
+    ## Join two set by changing id number of id1 and id2 path
+    # to the least of id1 and id2
+    #
+    # @param id1 integer id path number
+    # @param id2 integer id path number
+    # @return id integer new id
+    # @note old name was joinSet
+    #
+    def joinTwoSetsFromSetAndBinTable(self, id1, id2):
+        self.joinTwoSets(id1, id2)
+        if id1 < id2:
+            new_id = id1
+            old_id = id2
+        else:
+            new_id = id2
+            old_id = id1
+        sql_cmd = 'UPDATE %s SET path=%d WHERE path=%d'\
+                % (self._table_idx, new_id, old_id)
+        self._iDb.execute(sql_cmd)
+        return new_id
+    
+    ## Get a new id number from set bin table
+    #
+    def getNewId(self):
+        sql_cmd = 'select max(path) from %s;' % (self._table_idx)
+        self._iDb.execute(sql_cmd)
+        max_id = self._iDb.fetchall()[0][0]
+        if max_id != None:
+            return int(max_id)+1
+        else:
+            return 1
+        
+    ## Get a set list instance between start and end parameters
+    # using the bin scheme
+    #
+    # @param seqName reference seq name
+    # @param start start coordinate
+    # @param end end coordinate
+    # @return lSet set list
+    # @note old name was getSetList_from_qcoord
+    #
+    def getSetListFromQueryCoord(self, seqName, start, end):
+
+        min_coord = min(start,end)
+        max_coord = max(start,end)
+
+        sql_cmd = 'select path from %s where contig="%s" and ('\
+                 % (self._table + "_idx", seqName)
+        for i in xrange(8, 2, -1):
+            bin_lvl = pow(10, i)
+            if int(start/bin_lvl) == int(end/bin_lvl):       
+                bin = float(bin_lvl + (int(start / bin_lvl) / 1e10))
+                sql_cmd += 'bin=%f' % (bin)
+            else:
+                bin1 = float(bin_lvl + (int(start / bin_lvl) / 1e10))
+                bin2 = float(bin_lvl + (int(end  /bin_lvl) / 1e10))
+                sql_cmd += 'bin between %f and %f' % (bin1, bin2)
+            if bin_lvl != 1000:
+                sql_cmd += " or "
+
+        sql_cmd += ") and min<=%d and max>=%d" % (max_coord, min_coord);
+        self._iDb.execute(sql_cmd)
+        res = self._iDb.fetchall()
+        lId = []
+        for i in res:
+            lId.append(int(i[0]))
+        lSet = self.getSetListFromIdList(lId)
+        return lSet
+
+    ## Get a set list instances strictly included between start and end parameters
+    # using the bin scheme
+    #
+    # @param seqName reference seq name
+    # @param start start coordinate
+    # @param end end coordinate
+    # @return lSet set list
+    # @note old name was getInSetList_from_qcoord
+    # @warning the implementation has been changed : I added the two first lines
+    #
+    def getSetListStrictlyIncludedInQueryCoord(self, contig, start, end):
+        min_coord = min(start,end)
+        max_coord = max(start,end)
+        lSet = self.getSetListFromQueryCoord(contig, start, end)       
+        lSetStrictlyIncluded = []
+        for iSet in lSet:
+            if iSet.getMin() > min_coord and \
+               iSet.getMax() < max_coord:
+                lSetStrictlyIncluded.append(iSet)
+                            
+        return lSetStrictlyIncluded
+    
+    ## Get a list of the identifier Id contained in the table bin
+    #
+    # @return lId list of int list of identifier
+    # @note old name was getSet_num
+    #
+    def getIdList(self):
+        sql_cmd = 'select distinct path from %s;' % (self._table_idx)
+        self._iDb.execute(sql_cmd)
+        res = self._iDb.fetchall()
+        lId = []
+        for t in res:
+            lId.append(int(t[0]))
+        return lId
+    
+    ## Get a list of the query sequence name contained in the table bin
+    #
+    # @return lSeqName list of string list of query sequence name
+    # @note old name was getContig_name
+    #
+    def getSeqNameList(self):
+        sql_cmd = 'select distinct contig from %s;' % (self._table_idx)
+        self._iDb.execute(sql_cmd)
+        res = self._iDb.fetchall()
+        lSeqName = []
+        for t in res:
+            lSeqName.append(t[0])
+        return lSeqName
+    
+    ## Insert a Set list with the same new identifier in the table bin and set
+    #
+    # @note old name was insAddSetList
+    #
+    def insertListInSetAndBinTable(self, lSets, delayed = False):
+        id = self.getNewId()
+        SetUtils.changeIdInList( lSets, id )
+        for iSet in lSets:
+            self.insASetInSetAndBinTable(iSet, delayed)
+    
+    ## Insert a set list instances In table Bin and Set and merge all overlapping sets
+    #
+    # @param lSets reference seq name
+    # @note old name was insMergeSetList
+    #    
+    def insertListInSetAndBinTableAndMergeAllSets(self, lSets):
+        min, max = SetUtils.getListBoundaries(lSets)
+        oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max)
+        oldQueryhash = SetUtils.getDictOfListsWithIdAsKey(oldLSet)
+        qhash = SetUtils.getDictOfListsWithIdAsKey(lSets)
+        for lNewSetById in qhash.values():
+            found = False
+            for currentId, oldLsetById in oldQueryhash.items():
+                if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, oldLsetById):
+                    oldLsetById.extend(lNewSetById)
+                    oldLsetById = SetUtils.mergeSetsInList(oldLsetById)
+                    self.deleteFromIdFromSetAndBinTable(currentId)
+                    found = True
+            if not found:
+                self.insertListInSetAndBinTable(lNewSetById)
+            else:
+                id = self.getNewId()
+                SetUtils.changeIdInList(oldLsetById, id)
+                self.insertListInSetAndBinTable(oldLsetById)
+                
+    ## Insert a set list instances In table Bin and Set after removing all overlaps between database and lSets
+    #
+    # @param lSets reference seq name
+    # @note old name was insDiffSetList
+    #    
+    def insertListInSetAndBinTableAndRemoveOverlaps(self, lSets):
+        min, max = SetUtils.getListBoundaries(lSets)
+        oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max)
+        oldQueryHash = SetUtils.getDictOfListsWithIdAsKey(oldLSet)
+        newQueryHash = SetUtils.getDictOfListsWithIdAsKey(lSets)
+        for lNewSetById in newQueryHash.values():
+            for lOldSetById in oldQueryHash.values():
+                if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, lOldSetById):
+                    lNewSetById = SetUtils.getListOfSetWithoutOverlappingBetweenTwoListOfSet(lOldSetById, lNewSetById)
+            self.insertListInSetAndBinTable(lNewSetById)