view smart_toolShed/commons/core/sql/TableBinSetAdaptator.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line source

# Copyright INRA (Institut National de la Recherche Agronomique)
# http://www.inra.fr
# http://urgi.versailles.inra.fr
#
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software.  You can  use, 
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info". 
#
# As a counterpart to the access to the source code and  rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty  and the software's author,  the holder of the
# economic rights,  and the successive licensors  have only  limited
# liability. 
#
# In this respect, the user's attention is drawn to the risks associated
# with loading,  using,  modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean  that it is complicated to manipulate,  and  that  also
# therefore means  that it is reserved for developers  and  experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or 
# data to be ensured and,  more generally, to use and operate it in the 
# same conditions as regards security. 
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.

from commons.core.sql.TableSetAdaptator import TableSetAdaptator
from commons.core.coord.SetUtils import SetUtils

## Adaptator for Set tables with bin indexes
#
class TableBinSetAdaptator(TableSetAdaptator):
   
    ## constructor
    #
    # @param iDb DbMySql instance instance of DbMySql
    # @param tableName string table name (default = "")
    #
    def __init__(self, iDb, tableName = ""):
        TableSetAdaptator.__init__(self, iDb, tableName)
        self._table_idx = "%s_idx" % (self._table)
        
    ## Insert a set instance in a set bin table
    # 
    # @param iSet set instance an instance of set object
    # @param delayed boolean an insert delayed or not
    #
    def insASetInSetAndBinTable(self, iSet, delayed = False):
        self.insert(iSet, delayed)
        iSet.seqname = iSet.seqname.replace("\\", "\\\\")
        iSet.name = iSet.name.replace("\\", "\\\\")
        bin = iSet.getBin()
        max = iSet.getMax()
        min = iSet.getMin()
        strand = iSet.isOnDirectStrand()
        sql_prefix = ''
        if delayed:
            sql_prefix = 'INSERT DELAYED INTO '
        else:
            sql_prefix = 'INSERT INTO '
        sql_cmd = sql_prefix + '%s VALUES (%d,%f,"%s",%d,%d,%d)'\
                 %(self._table_idx,\
                   iSet.id,\
                   bin,\
                   iSet.seqname,\
                   min,\
                   max,\
                   strand)
        self._iDb.execute(sql_cmd)

    ## Delete set corresponding to a given identifier number in set and bin set table
    # @param id integer identifier number
    # @note old name was delSet_from_num
    #
    def deleteFromIdFromSetAndBinTable(self, id):
        self.deleteFromId(id)
        sql_cmd = 'delete from %s where path=%d' % (self._table_idx, id)
        self._iDb.execute(sql_cmd)

    ## Delete path corresponding to a given list of identifier number
    #
    # @param lId integer list list of identifier number
    # @note old name was delSet_from_listnum
    #
    def deleteFromListIdFromSetAndBinTable(self, lId):
        if lId != []:
            self.deleteFromIdList(lId)
            sql_cmd = 'delete from %s where path=%d' % (self._table_idx, lId[0])
            for i in lId[1:]:
                sql_cmd += " or path=%d" % (i)
            self._iDb.execute(sql_cmd)

    ## Join two set by changing id number of id1 and id2 path
    # to the least of id1 and id2
    #
    # @param id1 integer id path number
    # @param id2 integer id path number
    # @return id integer new id
    # @note old name was joinSet
    #
    def joinTwoSetsFromSetAndBinTable(self, id1, id2):
        self.joinTwoSets(id1, id2)
        if id1 < id2:
            new_id = id1
            old_id = id2
        else:
            new_id = id2
            old_id = id1
        sql_cmd = 'UPDATE %s SET path=%d WHERE path=%d'\
                % (self._table_idx, new_id, old_id)
        self._iDb.execute(sql_cmd)
        return new_id
    
    ## Get a new id number from set bin table
    #
    def getNewId(self):
        sql_cmd = 'select max(path) from %s;' % (self._table_idx)
        self._iDb.execute(sql_cmd)
        max_id = self._iDb.fetchall()[0][0]
        if max_id != None:
            return int(max_id)+1
        else:
            return 1
        
    ## Get a set list instance between start and end parameters
    # using the bin scheme
    #
    # @param seqName reference seq name
    # @param start start coordinate
    # @param end end coordinate
    # @return lSet set list
    # @note old name was getSetList_from_qcoord
    #
    def getSetListFromQueryCoord(self, seqName, start, end):

        min_coord = min(start,end)
        max_coord = max(start,end)

        sql_cmd = 'select path from %s where contig="%s" and ('\
                 % (self._table + "_idx", seqName)
        for i in xrange(8, 2, -1):
            bin_lvl = pow(10, i)
            if int(start/bin_lvl) == int(end/bin_lvl):       
                bin = float(bin_lvl + (int(start / bin_lvl) / 1e10))
                sql_cmd += 'bin=%f' % (bin)
            else:
                bin1 = float(bin_lvl + (int(start / bin_lvl) / 1e10))
                bin2 = float(bin_lvl + (int(end  /bin_lvl) / 1e10))
                sql_cmd += 'bin between %f and %f' % (bin1, bin2)
            if bin_lvl != 1000:
                sql_cmd += " or "

        sql_cmd += ") and min<=%d and max>=%d" % (max_coord, min_coord);
        self._iDb.execute(sql_cmd)
        res = self._iDb.fetchall()
        lId = []
        for i in res:
            lId.append(int(i[0]))
        lSet = self.getSetListFromIdList(lId)
        return lSet

    ## Get a set list instances strictly included between start and end parameters
    # using the bin scheme
    #
    # @param seqName reference seq name
    # @param start start coordinate
    # @param end end coordinate
    # @return lSet set list
    # @note old name was getInSetList_from_qcoord
    # @warning the implementation has been changed : I added the two first lines
    #
    def getSetListStrictlyIncludedInQueryCoord(self, contig, start, end):
        min_coord = min(start,end)
        max_coord = max(start,end)
        lSet = self.getSetListFromQueryCoord(contig, start, end)       
        lSetStrictlyIncluded = []
        for iSet in lSet:
            if iSet.getMin() > min_coord and \
               iSet.getMax() < max_coord:
                lSetStrictlyIncluded.append(iSet)
                            
        return lSetStrictlyIncluded
    
    ## Get a list of the identifier Id contained in the table bin
    #
    # @return lId list of int list of identifier
    # @note old name was getSet_num
    #
    def getIdList(self):
        sql_cmd = 'select distinct path from %s;' % (self._table_idx)
        self._iDb.execute(sql_cmd)
        res = self._iDb.fetchall()
        lId = []
        for t in res:
            lId.append(int(t[0]))
        return lId
    
    ## Get a list of the query sequence name contained in the table bin
    #
    # @return lSeqName list of string list of query sequence name
    # @note old name was getContig_name
    #
    def getSeqNameList(self):
        sql_cmd = 'select distinct contig from %s;' % (self._table_idx)
        self._iDb.execute(sql_cmd)
        res = self._iDb.fetchall()
        lSeqName = []
        for t in res:
            lSeqName.append(t[0])
        return lSeqName
    
    ## Insert a Set list with the same new identifier in the table bin and set
    #
    # @note old name was insAddSetList
    #
    def insertListInSetAndBinTable(self, lSets, delayed = False):
        id = self.getNewId()
        SetUtils.changeIdInList( lSets, id )
        for iSet in lSets:
            self.insASetInSetAndBinTable(iSet, delayed)
    
    ## Insert a set list instances In table Bin and Set and merge all overlapping sets
    #
    # @param lSets reference seq name
    # @note old name was insMergeSetList
    #    
    def insertListInSetAndBinTableAndMergeAllSets(self, lSets):
        min, max = SetUtils.getListBoundaries(lSets)
        oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max)
        oldQueryhash = SetUtils.getDictOfListsWithIdAsKey(oldLSet)
        qhash = SetUtils.getDictOfListsWithIdAsKey(lSets)
        for lNewSetById in qhash.values():
            found = False
            for currentId, oldLsetById in oldQueryhash.items():
                if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, oldLsetById):
                    oldLsetById.extend(lNewSetById)
                    oldLsetById = SetUtils.mergeSetsInList(oldLsetById)
                    self.deleteFromIdFromSetAndBinTable(currentId)
                    found = True
            if not found:
                self.insertListInSetAndBinTable(lNewSetById)
            else:
                id = self.getNewId()
                SetUtils.changeIdInList(oldLsetById, id)
                self.insertListInSetAndBinTable(oldLsetById)
                
    ## Insert a set list instances In table Bin and Set after removing all overlaps between database and lSets
    #
    # @param lSets reference seq name
    # @note old name was insDiffSetList
    #    
    def insertListInSetAndBinTableAndRemoveOverlaps(self, lSets):
        min, max = SetUtils.getListBoundaries(lSets)
        oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max)
        oldQueryHash = SetUtils.getDictOfListsWithIdAsKey(oldLSet)
        newQueryHash = SetUtils.getDictOfListsWithIdAsKey(lSets)
        for lNewSetById in newQueryHash.values():
            for lOldSetById in oldQueryHash.values():
                if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, lOldSetById):
                    lNewSetById = SetUtils.getListOfSetWithoutOverlappingBetweenTwoListOfSet(lOldSetById, lNewSetById)
            self.insertListInSetAndBinTable(lNewSetById)