Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.1.a/commons/core/coord/MapUtils.py @ 16:836ce3d9d47a draft default tip
Uploaded
author | urgi-team |
---|---|
date | Thu, 21 Jul 2016 07:42:47 -0400 |
parents | 255c852351c5 |
children |
comparison
equal
deleted
inserted
replaced
15:255c852351c5 | 16:836ce3d9d47a |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 import os | |
32 import sys | |
33 from commons.core.coord.Map import Map | |
34 from commons.core.coord.Set import Set | |
35 from commons.core.checker.CheckerUtils import CheckerUtils | |
36 | |
37 | |
38 ## static methods manipulating Map instances | |
39 # | |
40 class MapUtils( object ): | |
41 | |
42 ## Return a Map list from a Map file | |
43 # | |
44 # @param mapFile string name of a Map file | |
45 # @return a list of Map instances | |
46 # | |
47 @staticmethod | |
48 def getMapListFromFile(mapFile): | |
49 lMaps = [] | |
50 with open(mapFile, "r") as mapF: | |
51 for line in mapF: | |
52 iMap = Map() | |
53 iMap.setFromString(line) | |
54 lMaps.append(iMap) | |
55 return lMaps | |
56 | |
57 | |
58 ## Return a list of Map instances sorted in increasing order according to the min, then the max, and finally their initial order | |
59 # | |
60 # @param lMaps list of Map instances | |
61 # | |
62 @staticmethod | |
63 def getMapListSortedByIncreasingMinThenMax(lMaps): | |
64 return sorted(lMaps, key = lambda iMap: (iMap.getMin(), iMap.getMax())) | |
65 | |
66 | |
67 ## Return a list of Map instances sorted in increasing order according to the name, then the seqname, then the min, then the max | |
68 # | |
69 # @param lMaps list of Map instances | |
70 # | |
71 @staticmethod | |
72 def getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax(lMaps): | |
73 return sorted(lMaps, key = lambda iMap: (iMap.getName(), iMap.getSeqname(), iMap.getMin(), iMap.getMax())) | |
74 | |
75 | |
76 ## Return a dictionary which keys are Map names and values the corresponding Map instances | |
77 # | |
78 # @param mapFile string input map file name | |
79 # | |
80 @staticmethod | |
81 def getDictPerNameFromMapFile(mapFile): | |
82 dName2Maps = {} | |
83 with open(mapFile) as mapFileHandler: | |
84 for line in mapFileHandler: | |
85 iMap = Map() | |
86 iMap.setFromString(line, "\t") | |
87 if dName2Maps.has_key(iMap.name): | |
88 if iMap != dName2Maps[iMap.name]: | |
89 msg = "ERROR: in file '%s' two different Map instances have the same name '%s'" % (mapFile, iMap.name) | |
90 sys.stderr.write("%s\n" % msg) | |
91 sys.exit(1) | |
92 else: | |
93 dName2Maps[iMap.name] = iMap | |
94 return dName2Maps | |
95 | |
96 | |
97 ## Give a list of Set instances from a list of Map instances | |
98 # | |
99 # @param lMaps list of Map instances | |
100 # @return lSets list of Set instances | |
101 # | |
102 @staticmethod | |
103 def mapList2SetList(lMaps): | |
104 lSets = [] | |
105 c = 0 | |
106 for iMap in lMaps: | |
107 c += 1 | |
108 iSet = Set() | |
109 iSet.id = c | |
110 iSet.name = iMap.getName() | |
111 iSet.seqname = iMap.getSeqname() | |
112 iSet.start = iMap.getStart() | |
113 iSet.end = iMap.getEnd() | |
114 lSets.append(iSet) | |
115 return lSets | |
116 | |
117 | |
118 ## Merge the Map instances in a Map file using 'mapOp' | |
119 # | |
120 # @param mapFile string input map file name | |
121 # | |
122 @staticmethod | |
123 def mergeCoordsInFile(mapFile, outFile): | |
124 if not CheckerUtils.isExecutableInUserPath("mapOp"): | |
125 msg = "WARNING: can't find executable 'mapOp'" | |
126 sys.stderr.write("%s\n" % msg) | |
127 else: | |
128 cmd = "mapOp" | |
129 cmd += " -q %s" % mapFile | |
130 cmd += " -m" | |
131 cmd += " 2>&1 > /dev/null" | |
132 returnStatus = os.system(cmd) | |
133 if returnStatus != 0: | |
134 print "ERROR: mapOp returned %i" % returnStatus | |
135 sys.exit(1) | |
136 os.rename("%s.merge" % mapFile, outFile) | |
137 | |
138 | |
139 ## Return a dictionary which keys are Map seqnames and values the corresponding Map instances | |
140 # | |
141 # @param mapFile string input map file name | |
142 # | |
143 @staticmethod | |
144 def getDictPerSeqNameFromMapFile(mapFile): | |
145 dSeqName2Maps = {} | |
146 with open(mapFile) as mapFileHandler: | |
147 for line in mapFileHandler: | |
148 iMap = Map() | |
149 iMap.setFromString(line, "\t") | |
150 if not dSeqName2Maps.has_key(iMap.seqname): | |
151 dSeqName2Maps[iMap.seqname] = [] | |
152 dSeqName2Maps[iMap.seqname].append(iMap) | |
153 return dSeqName2Maps | |
154 | |
155 | |
156 ## Convert an Map file into a Set file | |
157 # | |
158 # @param mapFileName string input map file name | |
159 # @param setFileName string output set file name | |
160 # | |
161 @staticmethod | |
162 def convertMapFileIntoSetFile(mapFileName, setFileName = ""): | |
163 if not setFileName: | |
164 setFileName = "%s.set" % mapFileName | |
165 setFileHandler = open(setFileName, "w") | |
166 count = 0 | |
167 iMap = Map() | |
168 with open(mapFileName, "r") as mapFileHandler: | |
169 for line in mapFileHandler: | |
170 count += 1 | |
171 iMap.setFromString(line) | |
172 iSet = Set() | |
173 iSet.id = count | |
174 iSet.name = iMap.getName() | |
175 iSet.seqname = iMap.getSeqname() | |
176 iSet.start = iMap.getStart() | |
177 iSet.end = iMap.getEnd() | |
178 iSet.write(setFileHandler) | |
179 setFileHandler.close() | |
180 | |
181 | |
182 ## Write Map instances contained in the given list | |
183 # | |
184 # @param lMaps list of Map instances | |
185 # @param fileName a file name | |
186 # @param mode the open mode of the file '"w"' or '"a"' | |
187 # | |
188 @staticmethod | |
189 def writeListInFile(lMaps, fileName, mode = "w"): | |
190 fileHandler = open(fileName, mode) | |
191 for iMap in lMaps: | |
192 iMap.write(fileHandler) | |
193 fileHandler.close() | |
194 | |
195 | |
196 ## Get the length of the shorter annotation in map file | |
197 # | |
198 # @param mapFile string input map file name | |
199 # | |
200 @staticmethod | |
201 def getMinLengthOfMapFile(mapFileName): | |
202 lSizes = [] | |
203 with open(mapFileName) as fH: | |
204 for line in fH: | |
205 start = int(line.split('\t')[2]) | |
206 end = int(line.split('\t')[3]) | |
207 lSizes.append(end - start + 1) | |
208 return min(lSizes) | |
209 | |
210 | |
211 ## Get the length of the longest annotation in map file | |
212 # | |
213 # @param mapFile string input map file name | |
214 # | |
215 @staticmethod | |
216 def getMaxLengthOfMapFile(mapFileName): | |
217 maxLength = 0 | |
218 with open(mapFileName) as fH: | |
219 for line in fH: | |
220 start = int(line.split('\t')[2]) | |
221 end = int(line.split('\t')[3]) | |
222 maxLength = max(maxLength, end - start + 1) | |
223 return maxLength |