Mercurial > repos > yufei-luo > s_mart
diff commons/core/sql/TableBinSetAdaptator.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/sql/TableBinSetAdaptator.py Fri Jan 18 04:54:14 2013 -0500 @@ -0,0 +1,265 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +from commons.core.sql.TableSetAdaptator import TableSetAdaptator +from commons.core.coord.SetUtils import SetUtils + +## Adaptator for Set tables with bin indexes +# +class TableBinSetAdaptator(TableSetAdaptator): + + ## constructor + # + # @param iDb DbMySql instance instance of DbMySql + # @param tableName string table name (default = "") + # + def __init__(self, iDb, tableName = ""): + TableSetAdaptator.__init__(self, iDb, tableName) + self._table_idx = "%s_idx" % (self._table) + + ## Insert a set instance in a set bin table + # + # @param iSet set instance an instance of set object + # @param delayed boolean an insert delayed or not + # + def insASetInSetAndBinTable(self, iSet, delayed = False): + self.insert(iSet, delayed) + iSet.seqname = iSet.seqname.replace("\\", "\\\\") + iSet.name = iSet.name.replace("\\", "\\\\") + bin = iSet.getBin() + max = iSet.getMax() + min = iSet.getMin() + strand = iSet.isOnDirectStrand() + sql_prefix = '' + if delayed: + sql_prefix = 'INSERT DELAYED INTO ' + else: + sql_prefix = 'INSERT INTO ' + sql_cmd = sql_prefix + '%s VALUES (%d,%f,"%s",%d,%d,%d)'\ + %(self._table_idx,\ + iSet.id,\ + bin,\ + iSet.seqname,\ + min,\ + max,\ + strand) + self._iDb.execute(sql_cmd) + + ## Delete set corresponding to a given identifier number in set and bin set table + # @param id integer identifier number + # @note old name was delSet_from_num + # + def deleteFromIdFromSetAndBinTable(self, id): + self.deleteFromId(id) + sql_cmd = 'delete from %s where path=%d' % (self._table_idx, id) + self._iDb.execute(sql_cmd) + + ## Delete path corresponding to a given list of identifier number + # + # @param lId integer list list of identifier number + # @note old name was delSet_from_listnum + # + def deleteFromListIdFromSetAndBinTable(self, lId): + if lId != []: + self.deleteFromIdList(lId) + sql_cmd = 'delete from %s where path=%d' % (self._table_idx, lId[0]) + for i in lId[1:]: + sql_cmd += " or path=%d" % (i) + self._iDb.execute(sql_cmd) + + ## Join two set by changing id number of id1 and id2 path + # to the least of id1 and id2 + # + # @param id1 integer id path number + # @param id2 integer id path number + # @return id integer new id + # @note old name was joinSet + # + def joinTwoSetsFromSetAndBinTable(self, id1, id2): + self.joinTwoSets(id1, id2) + if id1 < id2: + new_id = id1 + old_id = id2 + else: + new_id = id2 + old_id = id1 + sql_cmd = 'UPDATE %s SET path=%d WHERE path=%d'\ + % (self._table_idx, new_id, old_id) + self._iDb.execute(sql_cmd) + return new_id + + ## Get a new id number from set bin table + # + def getNewId(self): + sql_cmd = 'select max(path) from %s;' % (self._table_idx) + self._iDb.execute(sql_cmd) + max_id = self._iDb.fetchall()[0][0] + if max_id != None: + return int(max_id)+1 + else: + return 1 + + ## Get a set list instance between start and end parameters + # using the bin scheme + # + # @param seqName reference seq name + # @param start start coordinate + # @param end end coordinate + # @return lSet set list + # @note old name was getSetList_from_qcoord + # + def getSetListFromQueryCoord(self, seqName, start, end): + + min_coord = min(start,end) + max_coord = max(start,end) + + sql_cmd = 'select path from %s where contig="%s" and ('\ + % (self._table + "_idx", seqName) + for i in xrange(8, 2, -1): + bin_lvl = pow(10, i) + if int(start/bin_lvl) == int(end/bin_lvl): + bin = float(bin_lvl + (int(start / bin_lvl) / 1e10)) + sql_cmd += 'bin=%f' % (bin) + else: + bin1 = float(bin_lvl + (int(start / bin_lvl) / 1e10)) + bin2 = float(bin_lvl + (int(end /bin_lvl) / 1e10)) + sql_cmd += 'bin between %f and %f' % (bin1, bin2) + if bin_lvl != 1000: + sql_cmd += " or " + + sql_cmd += ") and min<=%d and max>=%d" % (max_coord, min_coord); + self._iDb.execute(sql_cmd) + res = self._iDb.fetchall() + lId = [] + for i in res: + lId.append(int(i[0])) + lSet = self.getSetListFromIdList(lId) + return lSet + + ## Get a set list instances strictly included between start and end parameters + # using the bin scheme + # + # @param seqName reference seq name + # @param start start coordinate + # @param end end coordinate + # @return lSet set list + # @note old name was getInSetList_from_qcoord + # @warning the implementation has been changed : I added the two first lines + # + def getSetListStrictlyIncludedInQueryCoord(self, contig, start, end): + min_coord = min(start,end) + max_coord = max(start,end) + lSet = self.getSetListFromQueryCoord(contig, start, end) + lSetStrictlyIncluded = [] + for iSet in lSet: + if iSet.getMin() > min_coord and \ + iSet.getMax() < max_coord: + lSetStrictlyIncluded.append(iSet) + + return lSetStrictlyIncluded + + ## Get a list of the identifier Id contained in the table bin + # + # @return lId list of int list of identifier + # @note old name was getSet_num + # + def getIdList(self): + sql_cmd = 'select distinct path from %s;' % (self._table_idx) + self._iDb.execute(sql_cmd) + res = self._iDb.fetchall() + lId = [] + for t in res: + lId.append(int(t[0])) + return lId + + ## Get a list of the query sequence name contained in the table bin + # + # @return lSeqName list of string list of query sequence name + # @note old name was getContig_name + # + def getSeqNameList(self): + sql_cmd = 'select distinct contig from %s;' % (self._table_idx) + self._iDb.execute(sql_cmd) + res = self._iDb.fetchall() + lSeqName = [] + for t in res: + lSeqName.append(t[0]) + return lSeqName + + ## Insert a Set list with the same new identifier in the table bin and set + # + # @note old name was insAddSetList + # + def insertListInSetAndBinTable(self, lSets, delayed = False): + id = self.getNewId() + SetUtils.changeIdInList( lSets, id ) + for iSet in lSets: + self.insASetInSetAndBinTable(iSet, delayed) + + ## Insert a set list instances In table Bin and Set and merge all overlapping sets + # + # @param lSets reference seq name + # @note old name was insMergeSetList + # + def insertListInSetAndBinTableAndMergeAllSets(self, lSets): + min, max = SetUtils.getListBoundaries(lSets) + oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max) + oldQueryhash = SetUtils.getDictOfListsWithIdAsKey(oldLSet) + qhash = SetUtils.getDictOfListsWithIdAsKey(lSets) + for lNewSetById in qhash.values(): + found = False + for currentId, oldLsetById in oldQueryhash.items(): + if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, oldLsetById): + oldLsetById.extend(lNewSetById) + oldLsetById = SetUtils.mergeSetsInList(oldLsetById) + self.deleteFromIdFromSetAndBinTable(currentId) + found = True + if not found: + self.insertListInSetAndBinTable(lNewSetById) + else: + id = self.getNewId() + SetUtils.changeIdInList(oldLsetById, id) + self.insertListInSetAndBinTable(oldLsetById) + + ## Insert a set list instances In table Bin and Set after removing all overlaps between database and lSets + # + # @param lSets reference seq name + # @note old name was insDiffSetList + # + def insertListInSetAndBinTableAndRemoveOverlaps(self, lSets): + min, max = SetUtils.getListBoundaries(lSets) + oldLSet = self.getSetListFromQueryCoord(lSets[0].seqname, min, max) + oldQueryHash = SetUtils.getDictOfListsWithIdAsKey(oldLSet) + newQueryHash = SetUtils.getDictOfListsWithIdAsKey(lSets) + for lNewSetById in newQueryHash.values(): + for lOldSetById in oldQueryHash.values(): + if SetUtils.areSetsOverlappingBetweenLists(lNewSetById, lOldSetById): + lNewSetById = SetUtils.getListOfSetWithoutOverlappingBetweenTwoListOfSet(lOldSetById, lNewSetById) + self.insertListInSetAndBinTable(lNewSetById)