6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 import os
|
|
33 import sys
|
|
34 import shutil
|
|
35 from commons.core.coord.Align import Align
|
|
36
|
|
37
|
|
38 ## Static methods manipulating Align instances
|
|
39 #
|
|
40 class AlignUtils( object ):
|
|
41
|
|
42 ## Return a list with Align instances from the given file
|
|
43 #
|
|
44 # @param inFile name of a file in the Align format
|
|
45 #
|
|
46 def getAlignListFromFile( inFile ):
|
|
47 lAlignInstances = []
|
|
48 inFileHandler = open( inFile, "r" )
|
|
49 while True:
|
|
50 line = inFileHandler.readline()
|
|
51 if line == "":
|
|
52 break
|
|
53 a = Align()
|
|
54 a.setFromString( line )
|
|
55 lAlignInstances.append( a )
|
|
56 inFileHandler.close()
|
|
57 return lAlignInstances
|
|
58
|
|
59 getAlignListFromFile = staticmethod( getAlignListFromFile )
|
|
60
|
|
61
|
|
62 ## Return a list with all the scores
|
|
63 #
|
|
64 # @param lAlignInstances: list of Align instances
|
|
65 #
|
|
66 def getListOfScores( lAlignInstances ):
|
|
67 lScores = []
|
|
68 for iAlign in lAlignInstances:
|
|
69 lScores.append( iAlign.score )
|
|
70 return lScores
|
|
71
|
|
72 getListOfScores = staticmethod( getListOfScores )
|
|
73
|
|
74
|
|
75 ## Return a list with all the scores from the given file
|
|
76 #
|
|
77 # @param inFile name of a file in the Align format
|
|
78 #
|
|
79 def getScoreListFromFile(inFile):
|
|
80 lScores = []
|
|
81 append = lScores.append
|
|
82 with open(inFile, "r") as inFileHandler:
|
|
83 line = inFileHandler.readline()
|
|
84 while line:
|
|
85 if line != "\n":
|
|
86 append(int(line.split('\t')[7]))
|
|
87 line = inFileHandler.readline()
|
|
88 return lScores
|
|
89
|
|
90 getScoreListFromFile = staticmethod( getScoreListFromFile )
|
|
91
|
|
92
|
|
93 ## for each line of a given Align file, write the coordinates on the query and the subject as two distinct lines in a Map file
|
|
94 #
|
|
95 # @param alignFile: name of the input Align file
|
|
96 # @param mapFile: name of the output Map file
|
|
97 #
|
|
98 def convertAlignFileIntoMapFileWithQueriesAndSubjects( alignFile, mapFile ):
|
|
99 alignFileHandler = open( alignFile, "r" )
|
|
100 mapFileHandler = open( mapFile, "w" )
|
|
101 iAlign = Align()
|
|
102 while True:
|
|
103 line = alignFileHandler.readline()
|
|
104 if line == "":
|
|
105 break
|
|
106 iAlign.setFromString( line )
|
|
107 iMapQ, iMapS = iAlign.getMapsOfQueryAndSubject()
|
|
108 iMapQ.write( mapFileHandler )
|
|
109 iMapS.write( mapFileHandler )
|
|
110 alignFileHandler.close()
|
|
111 mapFileHandler.close()
|
|
112
|
|
113 convertAlignFileIntoMapFileWithQueriesAndSubjects = staticmethod( convertAlignFileIntoMapFileWithQueriesAndSubjects )
|
|
114
|
|
115
|
|
116 ## for each line of a given Align file, write the coordinates of the subject on the query as one line in a Map file
|
|
117 #
|
|
118 # @param alignFile: name of the input Align file
|
|
119 # @param mapFile: name of the output Map file
|
|
120 #
|
|
121 def convertAlignFileIntoMapFileWithSubjectsOnQueries( alignFile, mapFile ):
|
|
122 alignFileHandler = open( alignFile, "r" )
|
|
123 mapFileHandler = open( mapFile, "w" )
|
|
124 iAlign = Align()
|
|
125 while True:
|
|
126 line = alignFileHandler.readline()
|
|
127 if line == "":
|
|
128 break
|
|
129 iAlign.setFromString( line )
|
|
130 iMapQ = iAlign.getSubjectAsMapOfQuery()
|
|
131 iMapQ.write( mapFileHandler )
|
|
132 alignFileHandler.close()
|
|
133 mapFileHandler.close()
|
|
134
|
|
135 convertAlignFileIntoMapFileWithSubjectsOnQueries = staticmethod( convertAlignFileIntoMapFileWithSubjectsOnQueries )
|
|
136
|
|
137
|
|
138 ## return a list of Align instances sorted in decreasing order according to their score, then their length on the query and finally their initial order
|
|
139 #
|
|
140 # @param lAligns: list of Align instances
|
|
141 #
|
|
142 def getAlignListSortedByDecreasingScoreThenLength( lAligns ):
|
|
143 return sorted( lAligns, key=lambda iAlign: ( 1 / float(iAlign.getScore()), 1 / float(iAlign.getLengthOnQuery()) ) )
|
|
144
|
|
145 getAlignListSortedByDecreasingScoreThenLength = staticmethod( getAlignListSortedByDecreasingScoreThenLength )
|
|
146
|
|
147
|
|
148 ## Convert an Align file into a Path file
|
|
149 #
|
|
150 # @param alignFile string name of the input Align file
|
|
151 # @param pathFile string name of the output Path file
|
|
152 #
|
|
153 def convertAlignFileIntoPathFile( alignFile, pathFile ):
|
|
154 alignFileHandler = open( alignFile, "r" )
|
|
155 pathFileHandler = open( pathFile, "w" )
|
|
156 iAlign = Align()
|
|
157 countAlign = 0
|
|
158 while True:
|
|
159 line = alignFileHandler.readline()
|
|
160 if line == "":
|
|
161 break
|
|
162 countAlign += 1
|
|
163 iAlign.setFromString( line, "\t" )
|
|
164 pathFileHandler.write( "%i\t%s\n" % ( countAlign, iAlign.toString() ) )
|
|
165 alignFileHandler.close()
|
|
166 pathFileHandler.close()
|
|
167
|
|
168 convertAlignFileIntoPathFile = staticmethod( convertAlignFileIntoPathFile )
|
|
169
|
|
170
|
|
171 ## Sort an Align file
|
|
172 #
|
|
173 def sortAlignFile( inFile, outFile="" ):
|
|
174 if outFile == "":
|
|
175 outFile = "%s.sort" % ( inFile )
|
|
176 prg = "sort"
|
|
177 cmd = prg
|
|
178 cmd += " -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n"
|
|
179 cmd += " %s" % ( inFile )
|
|
180 cmd += " > %s" % ( outFile )
|
|
181 exitStatus = os.system( cmd )
|
|
182 if exitStatus != 0:
|
|
183 msg = "ERROR: '%s' returned '%i'" % ( prg, exitStatus )
|
|
184 sys.stderr.write( "%s\n" % ( msg ) )
|
|
185 sys.exit( exitStatus )
|
|
186
|
|
187 sortAlignFile = staticmethod( sortAlignFile )
|
|
188
|
|
189
|
|
190 ## Write Align instances contained in the given list
|
|
191 #
|
|
192 # @param lAlign a list of Align instances
|
|
193 # @param fileName name of the file to write the Align instances
|
|
194 # @param mode the open mode of the file ""w"" or ""a""
|
|
195 #
|
|
196 def writeListInFile( lAlign, fileName, mode="w" ):
|
|
197 fileHandler = open( fileName, mode )
|
|
198 for iAlign in lAlign:
|
|
199 iAlign.write( fileHandler )
|
|
200 fileHandler.close()
|
|
201
|
|
202 writeListInFile = staticmethod( writeListInFile )
|
|
203
|
|
204
|
|
205 ## Split a list of Align instances according to the name of the query
|
|
206 #
|
|
207 # @param lInAlign list of align instances
|
|
208 # @return lOutAlignList list of align instances lists
|
|
209 #
|
|
210 def splitAlignListByQueryName( lInAlign ):
|
|
211 lSortedAlign = sorted(lInAlign, key=lambda o: o.range_query.seqname)
|
|
212 lOutAlignList = []
|
|
213 if len(lSortedAlign) != 0 :
|
|
214 lAlignForCurrentQuery = []
|
|
215 previousQuery = lSortedAlign[0].range_query.seqname
|
|
216 for align in lSortedAlign :
|
|
217 currentQuery = align.range_query.seqname
|
|
218 if previousQuery != currentQuery :
|
|
219 lOutAlignList.append(lAlignForCurrentQuery)
|
|
220 previousQuery = currentQuery
|
|
221 lAlignForCurrentQuery = []
|
|
222 lAlignForCurrentQuery.append(align)
|
|
223
|
|
224 lOutAlignList.append(lAlignForCurrentQuery)
|
|
225
|
|
226 return lOutAlignList
|
|
227
|
|
228 splitAlignListByQueryName = staticmethod( splitAlignListByQueryName )
|
|
229
|
|
230
|
|
231 ## Create an Align file from each list of Align instances in the input list
|
|
232 #
|
|
233 # @param lAlignList list of lists with Align instances
|
|
234 # @param pattern string
|
|
235 # @param dirName string
|
|
236 #
|
|
237 def createAlignFiles( lAlignList, pattern, dirName="" ):
|
|
238 savedDir = os.getcwd()
|
|
239 nbFiles = len(lAlignList)
|
|
240 countFile = 1
|
|
241 if dirName != "" :
|
|
242 try:
|
|
243 os.makedirs(dirName)
|
|
244 except:
|
|
245 pass
|
|
246 os.chdir(dirName)
|
|
247
|
|
248 for lAlign in lAlignList:
|
|
249 fileName = "%s_%s.align" % (pattern, str(countFile).zfill(len(str(nbFiles))))
|
|
250 AlignUtils.writeListInFile(lAlign, fileName)
|
|
251 countFile += 1
|
|
252 os.chdir(savedDir)
|
|
253
|
|
254 createAlignFiles = staticmethod( createAlignFiles )
|
|
255
|
|
256
|
|
257 ## Return a list with Align instances sorted by query name, subject name, query start, query end and score
|
|
258 #
|
|
259 def sortList( lAligns ):
|
|
260 return sorted( lAligns, key=lambda iAlign: ( iAlign.getQueryName(),
|
|
261 iAlign.getSubjectName(),
|
|
262 iAlign.getQueryStart(),
|
|
263 iAlign.getQueryEnd(),
|
|
264 iAlign.getScore() ) )
|
|
265
|
|
266 sortList = staticmethod( sortList )
|
|
267
|
|
268
|
|
269 ## Return a list after merging all overlapping Align instances
|
|
270 #
|
|
271 def mergeList( lAligns ):
|
|
272 lMerged = []
|
|
273
|
|
274 lSorted = AlignUtils.sortList( lAligns )
|
|
275
|
|
276 prev_count = 0
|
|
277 for iAlign in lSorted:
|
|
278 if prev_count != len(lSorted):
|
|
279 for i in lSorted[ prev_count + 1: ]:
|
|
280 if iAlign.isOverlapping( i ):
|
|
281 iAlign.merge( i )
|
|
282 IsAlreadyInList = False
|
|
283 for newAlign in lMerged:
|
|
284 if newAlign.isOverlapping( iAlign ):
|
|
285 IsAlreadyInList = True
|
|
286 newAlign.merge( iAlign )
|
|
287 lMerged [ lMerged.index( newAlign ) ] = newAlign
|
|
288 if not IsAlreadyInList:
|
|
289 lMerged.append( iAlign )
|
|
290 prev_count += 1
|
|
291
|
|
292 return lMerged
|
|
293
|
|
294 mergeList = staticmethod( mergeList )
|
|
295
|
|
296
|
|
297 ## Merge all Align instance in a given Align file
|
|
298 #
|
|
299 def mergeFile( inFile, outFile="" ):
|
|
300 if outFile == "":
|
|
301 outFile = "%s.merged" % ( inFile )
|
|
302 if os.path.exists( outFile ):
|
|
303 os.remove( outFile )
|
|
304
|
|
305 tmpFile = "%s.sorted" % ( inFile )
|
|
306 AlignUtils.sortAlignFile( inFile, tmpFile )
|
|
307
|
|
308 tmpF = open( tmpFile, "r" )
|
|
309 dQrySbj2Aligns = {}
|
|
310 prevPairQrySbj = ""
|
|
311 while True:
|
|
312 line = tmpF.readline()
|
|
313 if line == "":
|
|
314 break
|
|
315 iAlign = Align()
|
|
316 iAlign.setFromString( line )
|
|
317 pairQrySbj = "%s_%s" % ( iAlign.getQueryName(), iAlign.getSubjectName() )
|
|
318 if not dQrySbj2Aligns.has_key( pairQrySbj ):
|
|
319 if prevPairQrySbj != "":
|
|
320 lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )
|
|
321 AlignUtils.writeListInFile( lMerged, outFile, "a" )
|
|
322 del dQrySbj2Aligns[ prevPairQrySbj ]
|
|
323 prevPairQrySbj = pairQrySbj
|
|
324 else:
|
|
325 prevPairQrySbj = pairQrySbj
|
|
326 dQrySbj2Aligns[ pairQrySbj ] = []
|
|
327 dQrySbj2Aligns[ pairQrySbj ].append( iAlign )
|
|
328 lMerged = []
|
|
329 if len(dQrySbj2Aligns.keys()) > 0:
|
|
330 lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )
|
|
331 AlignUtils.writeListInFile( lMerged, outFile, "a" )
|
|
332 tmpF.close()
|
|
333 os.remove( tmpFile )
|
|
334
|
|
335 mergeFile = staticmethod( mergeFile )
|
|
336
|
|
337
|
|
338 ## Update the scores of each match in the input file
|
|
339 #
|
|
340 # @note the new score is the length on the query times the percentage of identity
|
|
341 #
|
|
342 def updateScoresInFile( inFile, outFile ):
|
|
343 inHandler = open( inFile, "r" )
|
|
344 outHandler = open( outFile, "w" )
|
|
345 iAlign = Align()
|
|
346
|
|
347 while True:
|
|
348 line = inHandler.readline()
|
|
349 if line == "":
|
|
350 break
|
|
351 iAlign.reset()
|
|
352 iAlign.setFromString( line, "\t" )
|
|
353 iAlign.updateScore()
|
|
354 iAlign.write( outHandler )
|
|
355
|
|
356 inHandler.close()
|
|
357 outHandler.close()
|
|
358
|
|
359 updateScoresInFile = staticmethod( updateScoresInFile )
|