Mercurial > repos > urgi-team > teiso
diff TEisotools-1.1.a/commons/core/coord/MapUtils.py @ 13:feef9a0db09d draft
Uploaded
author | urgi-team |
---|---|
date | Wed, 20 Jul 2016 09:04:42 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TEisotools-1.1.a/commons/core/coord/MapUtils.py Wed Jul 20 09:04:42 2016 -0400 @@ -0,0 +1,223 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +import os +import sys +from commons.core.coord.Map import Map +from commons.core.coord.Set import Set +from commons.core.checker.CheckerUtils import CheckerUtils + + +## static methods manipulating Map instances +# +class MapUtils( object ): + + ## Return a Map list from a Map file + # + # @param mapFile string name of a Map file + # @return a list of Map instances + # + @staticmethod + def getMapListFromFile(mapFile): + lMaps = [] + with open(mapFile, "r") as mapF: + for line in mapF: + iMap = Map() + iMap.setFromString(line) + lMaps.append(iMap) + return lMaps + + + ## Return a list of Map instances sorted in increasing order according to the min, then the max, and finally their initial order + # + # @param lMaps list of Map instances + # + @staticmethod + def getMapListSortedByIncreasingMinThenMax(lMaps): + return sorted(lMaps, key = lambda iMap: (iMap.getMin(), iMap.getMax())) + + + ## Return a list of Map instances sorted in increasing order according to the name, then the seqname, then the min, then the max + # + # @param lMaps list of Map instances + # + @staticmethod + def getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax(lMaps): + return sorted(lMaps, key = lambda iMap: (iMap.getName(), iMap.getSeqname(), iMap.getMin(), iMap.getMax())) + + + ## Return a dictionary which keys are Map names and values the corresponding Map instances + # + # @param mapFile string input map file name + # + @staticmethod + def getDictPerNameFromMapFile(mapFile): + dName2Maps = {} + with open(mapFile) as mapFileHandler: + for line in mapFileHandler: + iMap = Map() + iMap.setFromString(line, "\t") + if dName2Maps.has_key(iMap.name): + if iMap != dName2Maps[iMap.name]: + msg = "ERROR: in file '%s' two different Map instances have the same name '%s'" % (mapFile, iMap.name) + sys.stderr.write("%s\n" % msg) + sys.exit(1) + else: + dName2Maps[iMap.name] = iMap + return dName2Maps + + + ## Give a list of Set instances from a list of Map instances + # + # @param lMaps list of Map instances + # @return lSets list of Set instances + # + @staticmethod + def mapList2SetList(lMaps): + lSets = [] + c = 0 + for iMap in lMaps: + c += 1 + iSet = Set() + iSet.id = c + iSet.name = iMap.getName() + iSet.seqname = iMap.getSeqname() + iSet.start = iMap.getStart() + iSet.end = iMap.getEnd() + lSets.append(iSet) + return lSets + + + ## Merge the Map instances in a Map file using 'mapOp' + # + # @param mapFile string input map file name + # + @staticmethod + def mergeCoordsInFile(mapFile, outFile): + if not CheckerUtils.isExecutableInUserPath("mapOp"): + msg = "WARNING: can't find executable 'mapOp'" + sys.stderr.write("%s\n" % msg) + else: + cmd = "mapOp" + cmd += " -q %s" % mapFile + cmd += " -m" + cmd += " 2>&1 > /dev/null" + returnStatus = os.system(cmd) + if returnStatus != 0: + print "ERROR: mapOp returned %i" % returnStatus + sys.exit(1) + os.rename("%s.merge" % mapFile, outFile) + + + ## Return a dictionary which keys are Map seqnames and values the corresponding Map instances + # + # @param mapFile string input map file name + # + @staticmethod + def getDictPerSeqNameFromMapFile(mapFile): + dSeqName2Maps = {} + with open(mapFile) as mapFileHandler: + for line in mapFileHandler: + iMap = Map() + iMap.setFromString(line, "\t") + if not dSeqName2Maps.has_key(iMap.seqname): + dSeqName2Maps[iMap.seqname] = [] + dSeqName2Maps[iMap.seqname].append(iMap) + return dSeqName2Maps + + + ## Convert an Map file into a Set file + # + # @param mapFileName string input map file name + # @param setFileName string output set file name + # + @staticmethod + def convertMapFileIntoSetFile(mapFileName, setFileName = ""): + if not setFileName: + setFileName = "%s.set" % mapFileName + setFileHandler = open(setFileName, "w") + count = 0 + iMap = Map() + with open(mapFileName, "r") as mapFileHandler: + for line in mapFileHandler: + count += 1 + iMap.setFromString(line) + iSet = Set() + iSet.id = count + iSet.name = iMap.getName() + iSet.seqname = iMap.getSeqname() + iSet.start = iMap.getStart() + iSet.end = iMap.getEnd() + iSet.write(setFileHandler) + setFileHandler.close() + + + ## Write Map instances contained in the given list + # + # @param lMaps list of Map instances + # @param fileName a file name + # @param mode the open mode of the file '"w"' or '"a"' + # + @staticmethod + def writeListInFile(lMaps, fileName, mode = "w"): + fileHandler = open(fileName, mode) + for iMap in lMaps: + iMap.write(fileHandler) + fileHandler.close() + + + ## Get the length of the shorter annotation in map file + # + # @param mapFile string input map file name + # + @staticmethod + def getMinLengthOfMapFile(mapFileName): + lSizes = [] + with open(mapFileName) as fH: + for line in fH: + start = int(line.split('\t')[2]) + end = int(line.split('\t')[3]) + lSizes.append(end - start + 1) + return min(lSizes) + + + ## Get the length of the longest annotation in map file + # + # @param mapFile string input map file name + # + @staticmethod + def getMaxLengthOfMapFile(mapFileName): + maxLength = 0 + with open(mapFileName) as fH: + for line in fH: + start = int(line.split('\t')[2]) + end = int(line.split('\t')[3]) + maxLength = max(maxLength, end - start + 1) + return maxLength \ No newline at end of file