Mercurial > repos > urgi-team > teiso
view TEisotools-1.0/commons/core/coord/MapUtils.py @ 6:20ec0d14798e draft
Uploaded
author | urgi-team |
---|---|
date | Wed, 20 Jul 2016 05:00:24 -0400 |
parents | |
children |
line wrap: on
line source
# Copyright INRA (Institut National de la Recherche Agronomique) # http://www.inra.fr # http://urgi.versailles.inra.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. import os import sys from commons.core.coord.Map import Map from commons.core.coord.Set import Set from commons.core.checker.CheckerUtils import CheckerUtils ## static methods manipulating Map instances # class MapUtils( object ): ## Return a Map list from a Map file # # @param mapFile string name of a Map file # @return a list of Map instances # @staticmethod def getMapListFromFile(mapFile): lMaps = [] with open(mapFile, "r") as mapF: for line in mapF: iMap = Map() iMap.setFromString(line) lMaps.append(iMap) return lMaps ## Return a list of Map instances sorted in increasing order according to the min, then the max, and finally their initial order # # @param lMaps list of Map instances # @staticmethod def getMapListSortedByIncreasingMinThenMax(lMaps): return sorted(lMaps, key = lambda iMap: (iMap.getMin(), iMap.getMax())) ## Return a list of Map instances sorted in increasing order according to the name, then the seqname, then the min, then the max # # @param lMaps list of Map instances # @staticmethod def getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax(lMaps): return sorted(lMaps, key = lambda iMap: (iMap.getName(), iMap.getSeqname(), iMap.getMin(), iMap.getMax())) ## Return a dictionary which keys are Map names and values the corresponding Map instances # # @param mapFile string input map file name # @staticmethod def getDictPerNameFromMapFile(mapFile): dName2Maps = {} with open(mapFile) as mapFileHandler: for line in mapFileHandler: iMap = Map() iMap.setFromString(line, "\t") if dName2Maps.has_key(iMap.name): if iMap != dName2Maps[iMap.name]: msg = "ERROR: in file '%s' two different Map instances have the same name '%s'" % (mapFile, iMap.name) sys.stderr.write("%s\n" % msg) sys.exit(1) else: dName2Maps[iMap.name] = iMap return dName2Maps ## Give a list of Set instances from a list of Map instances # # @param lMaps list of Map instances # @return lSets list of Set instances # @staticmethod def mapList2SetList(lMaps): lSets = [] c = 0 for iMap in lMaps: c += 1 iSet = Set() iSet.id = c iSet.name = iMap.getName() iSet.seqname = iMap.getSeqname() iSet.start = iMap.getStart() iSet.end = iMap.getEnd() lSets.append(iSet) return lSets ## Merge the Map instances in a Map file using 'mapOp' # # @param mapFile string input map file name # @staticmethod def mergeCoordsInFile(mapFile, outFile): if not CheckerUtils.isExecutableInUserPath("mapOp"): msg = "WARNING: can't find executable 'mapOp'" sys.stderr.write("%s\n" % msg) else: cmd = "mapOp" cmd += " -q %s" % mapFile cmd += " -m" cmd += " 2>&1 > /dev/null" returnStatus = os.system(cmd) if returnStatus != 0: print "ERROR: mapOp returned %i" % returnStatus sys.exit(1) os.rename("%s.merge" % mapFile, outFile) ## Return a dictionary which keys are Map seqnames and values the corresponding Map instances # # @param mapFile string input map file name # @staticmethod def getDictPerSeqNameFromMapFile(mapFile): dSeqName2Maps = {} with open(mapFile) as mapFileHandler: for line in mapFileHandler: iMap = Map() iMap.setFromString(line, "\t") if not dSeqName2Maps.has_key(iMap.seqname): dSeqName2Maps[iMap.seqname] = [] dSeqName2Maps[iMap.seqname].append(iMap) return dSeqName2Maps ## Convert an Map file into a Set file # # @param mapFileName string input map file name # @param setFileName string output set file name # @staticmethod def convertMapFileIntoSetFile(mapFileName, setFileName = ""): if not setFileName: setFileName = "%s.set" % mapFileName setFileHandler = open(setFileName, "w") count = 0 iMap = Map() with open(mapFileName, "r") as mapFileHandler: for line in mapFileHandler: count += 1 iMap.setFromString(line) iSet = Set() iSet.id = count iSet.name = iMap.getName() iSet.seqname = iMap.getSeqname() iSet.start = iMap.getStart() iSet.end = iMap.getEnd() iSet.write(setFileHandler) setFileHandler.close() ## Write Map instances contained in the given list # # @param lMaps list of Map instances # @param fileName a file name # @param mode the open mode of the file '"w"' or '"a"' # @staticmethod def writeListInFile(lMaps, fileName, mode = "w"): fileHandler = open(fileName, mode) for iMap in lMaps: iMap.write(fileHandler) fileHandler.close() ## Get the length of the shorter annotation in map file # # @param mapFile string input map file name # @staticmethod def getMinLengthOfMapFile(mapFileName): lSizes = [] with open(mapFileName) as fH: for line in fH: start = int(line.split('\t')[2]) end = int(line.split('\t')[3]) lSizes.append(end - start + 1) return min(lSizes) ## Get the length of the longest annotation in map file # # @param mapFile string input map file name # @staticmethod def getMaxLengthOfMapFile(mapFileName): maxLength = 0 with open(mapFileName) as fH: for line in fH: start = int(line.split('\t')[2]) end = int(line.split('\t')[3]) maxLength = max(maxLength, end - start + 1) return maxLength