annotate TEisotools-1.1.a/commons/core/coord/PathUtils.py @ 15:255c852351c5 draft

Uploaded
author urgi-team
date Thu, 21 Jul 2016 07:36:44 -0400
parents feef9a0db09d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
2 # http://www.inra.fr
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
4 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
9 # "http://www.cecill.info".
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
10 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
15 # liability.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
16 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
26 # same conditions as regards security.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
27 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
30
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
31
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
32 import os
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
33 import sys
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
34 import copy
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
35 from commons.core.coord.Map import Map
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
36 from commons.core.coord.Path import Path
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
37 from commons.core.coord.Align import Align
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
38 from commons.core.coord.Range import Range
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
39 from commons.core.coord.SetUtils import SetUtils
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
40 from commons.core.coord.AlignUtils import AlignUtils
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
41 from commons.core.checker.RepetException import RepetDataException
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
42
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
43 ## Static methods for the manipulation of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
44 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
45 class PathUtils ( object ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
46
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
47 ## Change the identifier of each Set instance in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
48 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
49 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
50 # @param newId new identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
51 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
52 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
53 def changeIdInList(lPaths, newId):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
54 for iPath in lPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
55 iPath.id = newId
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
56
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
57
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
58 ## Return a list of Set instances containing the query range from a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
59 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
60 # @param lPaths a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
61 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
62 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
63 def getSetListFromQueries(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
64 lSets = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
65 for iPath in lPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
66 lSets.append( iPath.getSubjectAsSetOfQuery() )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
67 return lSets
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
68
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
69
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
70 ## Return a list of Set instances containing the subject range from a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
71 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
72 # @param lPaths a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
73 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
74 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
75 def getSetListFromSubjects(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
76 lSets = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
77 for iPath in lPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
78 lSets.append( iPath.getQuerySetOfSubject() )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
79 return lSets
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
80
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
81
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
82 ## Return a sorted list of Range instances containing the subjects from a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
83 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
84 # @param lPaths a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
85 # @note meaningful only if all Path instances have same identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
86 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
87 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
88 def getRangeListFromSubjects( lPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
89 lRanges = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
90 for iPath in lPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
91 lRanges.append( iPath.range_subject )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
92 if lRanges[0].isOnDirectStrand():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
93 return sorted( lRanges, key=lambda iRange: ( iRange.getMin(), iRange.getMax() ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
94 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
95 return sorted( lRanges, key=lambda iRange: ( iRange.getMax(), iRange.getMin() ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
96
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
97
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
98 ## Return a tuple with min and max of query coordinates from Path instances in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
99 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
100 # @param lPaths a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
101 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
102 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
103 def getQueryMinMaxFromPathList(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
104 qmin = -1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
105 qmax = -1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
106 for iPath in lPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
107 if qmin == -1:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
108 qmin = iPath.range_query.start
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
109 qmin = min(qmin, iPath.range_query.getMin())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
110 qmax = max(qmax, iPath.range_query.getMax())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
111 return (qmin, qmax)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
112
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
113
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
114 ## Return a tuple with min and max of subject coordinates from Path instances in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
115 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
116 # @param lPaths lists of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
117 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
118 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
119 def getSubjectMinMaxFromPathList(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
120 smin = -1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
121 smax = -1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
122 for iPath in lPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
123 if smin == -1:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
124 smin = iPath.range_subject.start
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
125 smin = min(smin, iPath.range_subject.getMin())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
126 smax = max(smax, iPath.range_subject.getMax())
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
127 return (smin, smax)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
128
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
129
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
130 ## Returns a Path objects list where Paths query coordinates overlapping with
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
131 # any Path in a list are removed.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
132 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
133 # WARNING: input Path lists are modified (sort)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
134 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
135 # @param lRefPaths list of paths to check overlaps
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
136 # @param lPathsToClean list of paths to remove overlapping Paths on query coordinates
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
137 # @return path list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
138 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
139 def removeOverlappingPathsOnQueriesBetweenPathLists(lRefPaths, lPathsToClean):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
140 if not lRefPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
141 print "WARNING: empty reference Paths list"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
142 return lPathsToClean
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
143
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
144 lRefQueries = PathUtils.getListOfDistinctQueryNames(lRefPaths)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
145 lToCleanQueries = PathUtils.getListOfDistinctQueryNames(lPathsToClean)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
146
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
147 lCommonQueries = list(set(lRefQueries) & set(lToCleanQueries))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
148 lCommonQueries.sort()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
149 lSpecificToCleanQueries = list(set(lToCleanQueries) - set(lCommonQueries))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
150 lSpecificToCleanQueries.sort()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
151
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
152 lRefPaths.sort(key=lambda iPath: (iPath.getQueryName(), iPath.getIdentifier(), iPath.getQueryMin(), iPath.getQueryMax()))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
153 lPathsToClean.sort(key=lambda iPath: (iPath.getQueryName(), iPath.getIdentifier(), iPath.getQueryMin(), iPath.getQueryMax()))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
154
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
155 lCleanedPaths = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
156 lSpecificToCleanQueries = list(set(lToCleanQueries) - set(lCommonQueries))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
157 lCleanedPaths.extend(PathUtils.extractPathsFromQueryNameList(lPathsToClean, lSpecificToCleanQueries))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
158
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
159 dRefQueryToPathList = PathUtils.getDictOfListsWithQueryNameAsKey(lRefPaths)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
160 dToCleanQueryToPathList = PathUtils.getDictOfListsWithQueryNameAsKey(lPathsToClean)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
161
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
162 for queryName in lCommonQueries:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
163
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
164 refQueryHash = PathUtils.getDictOfListsWithIdAsKey(dRefQueryToPathList[queryName])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
165 toCleanQueryHash = PathUtils.getDictOfListsWithIdAsKey(dToCleanQueryToPathList[queryName])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
166
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
167 for lCleanPathById in toCleanQueryHash.values():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
168 isOverlapping = False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
169
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
170 for lRefPathById in refQueryHash.values():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
171 if PathUtils.areQueriesOverlappingBetweenPathLists(lRefPathById, lCleanPathById, areListsAlreadySort = True):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
172 isOverlapping = True
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
173 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
174
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
175 if not isOverlapping:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
176 lCleanedPaths.extend(lCleanPathById)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
177
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
178 return lCleanedPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
179
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
180
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
181 ## Return True if the query range of any Path instance from the first list overlaps with the query range of any Path instance from the second list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
182 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
183 # @param lPaths1: list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
184 # @param lPaths2: list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
185 # @return boolean
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
186 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
187 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
188 def areQueriesOverlappingBetweenPathLists( lPaths1, lPaths2, areListsAlreadySort = False):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
189 if not areListsAlreadySort:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
190 lSortedPaths1 = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths1 )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
191 lSortedPaths2 = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths2 )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
192 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
193 lSortedPaths1 = lPaths1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
194 lSortedPaths2 = lPaths2
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
195 i = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
196 j = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
197 while i != len(lSortedPaths1):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
198 j = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
199 while j != len(lSortedPaths2):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
200 if not lSortedPaths1[i].range_query.isOverlapping( lSortedPaths2[j].range_query ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
201 j += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
202 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
203 return True
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
204 i += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
205 return False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
206
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
207
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
208 ## Show Path instances contained in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
209 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
210 # @param lPaths a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
211 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
212 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
213 def showList(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
214 for iPath in lPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
215 iPath.show()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
216
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
217
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
218 ## Write Path instances contained in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
219 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
220 # @param lPaths a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
221 # @param fileName name of the file to write the Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
222 # @param mode the open mode of the file ""w"" or ""a""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
223 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
224 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
225 def writeListInFile(lPaths, fileName, mode="w"):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
226 AlignUtils.writeListInFile(lPaths, fileName, mode)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
227
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
228
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
229 ## Return new list of Path instances with no duplicate
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
230 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
231 # @param lPaths a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
232 # @param useOnlyCoord boolean if True, check only coordinates and sequence names
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
233 # @return lUniqPaths a path instances list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
234 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
235 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
236 def getPathListWithoutDuplicates(lPaths, useOnlyCoord = False):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
237 if len(lPaths) < 2:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
238 return lPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
239 lSortedPaths = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier( lPaths )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
240 lUniqPaths = [ lSortedPaths[0] ]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
241 if useOnlyCoord:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
242 for iPath in lSortedPaths[1:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
243 if iPath.range_query.start != lUniqPaths[-1].range_query.start \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
244 or iPath.range_query.end != lUniqPaths[-1].range_query.end \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
245 or iPath.range_query.seqname != lUniqPaths[-1].range_query.seqname \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
246 or iPath.range_subject.start != lUniqPaths[-1].range_subject.start \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
247 or iPath.range_subject.end != lUniqPaths[-1].range_subject.end \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
248 or iPath.range_subject.seqname != lUniqPaths[-1].range_subject.seqname:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
249 lUniqPaths.append( iPath )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
250 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
251 for iPath in lSortedPaths[1:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
252 if iPath != lUniqPaths[-1]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
253 lUniqPaths.append( iPath )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
254 return lUniqPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
255
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
256
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
257 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
258 def getPathListWithoutDuplicatesOnQueryCoord(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
259 if len(lPaths) < 2:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
260 return lPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
261 lSortedPaths = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier( lPaths )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
262 lUniqPaths = [ lSortedPaths[0] ]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
263 for iPath in lSortedPaths[1:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
264 if iPath.range_query.start != lUniqPaths[-1].range_query.start \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
265 or iPath.range_query.end != lUniqPaths[-1].range_query.end \
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
266 or iPath.range_query.seqname != lUniqPaths[-1].range_query.seqname:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
267 lUniqPaths.append( iPath )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
268 return lUniqPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
269
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
270
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
271 ## Split a Path list in several Path lists according to the identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
272 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
273 # @param lPaths a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
274 # @return a dictionary which keys are identifiers and values Path lists
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
275 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
276 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
277 def getDictOfListsWithIdAsKey(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
278 dId2PathList = dict((ident, []) for ident in PathUtils.getListOfDistinctIdentifiers(lPaths))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
279 for iPath in lPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
280 dId2PathList[iPath.id].append(iPath)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
281 return dId2PathList
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
282
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
283
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
284 ## Split a Path list in several Path lists according to the query name
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
285 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
286 # @param lPaths a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
287 # @return a dictionary which keys are query_names and values Path lists
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
288 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
289 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
290 def getDictOfListsWithQueryNameAsKey(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
291 dId2PathList = dict((qn, []) for qn in PathUtils.getListOfDistinctQueryNames(lPaths))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
292 for iPath in lPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
293 dId2PathList[iPath.getQueryName()].append(iPath)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
294 return dId2PathList
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
295
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
296
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
297 ## Split a Path file in several Path lists according to the identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
298 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
299 # @param pathFile name of the input Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
300 # @return a dictionary which keys are identifiers and values Path lists
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
301 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
302 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
303 def getDictOfListsWithIdAsKeyFromFile( pathFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
304 dId2PathList = {}
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
305 pathFileHandler = open(pathFile, "r")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
306 for line in pathFileHandler:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
307 iPath = Path()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
308 iPath.setFromString(line)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
309 if dId2PathList.has_key(iPath.id):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
310 dId2PathList[ iPath.id ].append(iPath)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
311 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
312 dId2PathList[ iPath.id ] = [ iPath ]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
313 pathFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
314 return dId2PathList
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
315
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
316
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
317 ## Return a list of Path list(s) obtained while splitting a list of connected Path instances according to another based on query coordinates
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
318 # Only the path instance of lToKeep between path instance of lToUnjoin are used to split lToUnjoin
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
319 # @param lToKeep: a list of Path instances to keep (reference)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
320 # @param lToUnjoin: a list of Path instances to unjoin
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
321 # @return: list of Path list(s) (can be empty if one of the input lists is empty)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
322 # @warning: all the path instances in a given list MUST be connected (i.e. same identifier)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
323 # @warning: if the path instances in a given list overlap neither within each other nor with the Path instances of the other list, these path instances are not used to split the lToUnjoin
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
324 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
325 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
326 def getPathListUnjoinedBasedOnQuery( lToKeep, lToUnjoin ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
327 lSortedToKeep = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lToKeep )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
328 length_lSortedToKeep = len(lSortedToKeep)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
329 # PathUtils.showList(lSortedToKeep)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
330 lSortedToUnjoin = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lToUnjoin )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
331 # PathUtils.showList(lSortedToUnjoin)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
332 length_lSortedToUnjoin = len(lSortedToUnjoin)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
333 if lToUnjoin == []:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
334 return []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
335 if lToKeep == []:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
336 return [ lToUnjoin ]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
337
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
338 lLists = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
339 k = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
340 while k < length_lSortedToKeep:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
341 j1 = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
342 while j1 < length_lSortedToUnjoin and lSortedToKeep[k].range_query.getMin() > lSortedToUnjoin[j1].range_query.getMax():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
343 j1 += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
344 if j1 == length_lSortedToUnjoin:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
345 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
346 if j1 != 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
347 lLists.append( lSortedToUnjoin[:j1] )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
348 del lSortedToUnjoin[:j1]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
349 j1 = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
350 if k+1 == len(lSortedToKeep):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
351 break
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
352 j2 = j1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
353 minQueryOf_lSortedToKeepKplus1 = lSortedToKeep[k+1].range_query.getMin()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
354 maxQueryOf_lSortedToUnjoinJ2 = lSortedToUnjoin[j2].range_query.getMax()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
355 if j2 < length_lSortedToUnjoin and minQueryOf_lSortedToKeepKplus1 > maxQueryOf_lSortedToUnjoinJ2:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
356 while j2 < len(lSortedToUnjoin) and minQueryOf_lSortedToKeepKplus1 > maxQueryOf_lSortedToUnjoinJ2:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
357 j2 += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
358 maxQueryOf_lSortedToUnjoinJ2 = lSortedToUnjoin[j2].range_query.getMax()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
359 lLists.append( lSortedToUnjoin[j1:j2] )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
360 del lSortedToUnjoin[j1:j2]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
361 k += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
362
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
363 if lLists != [] or k == 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
364 lLists.append( lSortedToUnjoin )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
365 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
366 lLists = lSortedToUnjoin
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
367
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
368 return lLists
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
369
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
370
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
371 ## Return the identity of the Path list, the identity of each instance being weighted by the length of each query range
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
372 # All Paths should have the same query and subject.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
373 # The Paths are merged using query coordinates only.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
374 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
375 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
376 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
377 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
378 def getIdentityFromPathList( lPaths, checkSubjects=True ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
379 if len( PathUtils.getListOfDistinctQueryNames( lPaths ) ) > 1:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
380 msg = "ERROR: try to compute identity from Paths with different queries"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
381 sys.stderr.write( "%s\n" % msg )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
382 sys.stderr.flush()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
383 raise Exception
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
384 if checkSubjects and len( PathUtils.getListOfDistinctSubjectNames( lPaths ) ) > 1:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
385 msg = "ERROR: try to compute identity from Paths with different subjects"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
386 sys.stderr.write( "%s\n" % msg )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
387 sys.stderr.flush()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
388 raise Exception
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
389 identity = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
390 lMergedPaths = PathUtils.mergePathsInListUsingQueryCoordsOnly( lPaths )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
391 lQuerySets = PathUtils.getSetListFromQueries( lMergedPaths )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
392 lMergedQuerySets = SetUtils.mergeSetsInList( lQuerySets )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
393 totalLengthOnQry = SetUtils.getCumulLength( lMergedQuerySets )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
394 for iPath in lMergedPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
395 identity += iPath.identity * iPath.getLengthOnQuery()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
396 weightedIdentity = identity / float(totalLengthOnQry)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
397 if weightedIdentity < 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
398 msg = "ERROR: weighted identity '%.2f' outside range" % weightedIdentity
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
399 sys.stderr.write("%s\n" % msg)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
400 sys.stderr.flush()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
401 raise Exception
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
402 elif weightedIdentity > 100:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
403 msg = "ERROR: weighted identity '%.2f' outside range" % weightedIdentity
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
404 sys.stderr.write("%s\n" % msg)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
405 sys.stderr.flush()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
406 raise RepetDataException(msg)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
407 return weightedIdentity
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
408
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
409
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
410 ## Return a list of Path instances sorted in increasing order according to the min of the query, then the max of the query, and finally their initial order.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
411 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
412 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
413 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
414 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
415 def getPathListSortedByIncreasingMinQueryThenMaxQuery(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
416 return sorted( lPaths, key=lambda iPath: ( iPath.getQueryMin(), iPath.getQueryMax() ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
417
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
418
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
419 ## Return a list of Path instances sorted in increasing order according to the min of the query, then the max of the query, then their identifier, and finally their initial order.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
420 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
421 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
422 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
423 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
424 def getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
425 return sorted( lPaths, key=lambda iPath: ( iPath.getQueryMin(), iPath.getQueryMax(), iPath.getIdentifier() ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
426
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
427
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
428 ## Return a list of Path instances sorted in increasing order according to the min of the query, then the max of the query, then the min of the subject, then the max of the subject and finally their initial order.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
429 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
430 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
431 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
432 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
433 def getPathListSortedByIncreasingMinQueryThenMaxQueryThenMinSubjectThenMaxSubject(lPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
434 return sorted(lPaths, key=lambda iPath: (iPath.getQueryMin(), iPath.getQueryMax(), iPath.getSubjectMin(), iPath.getSubjectMax()))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
435
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
436
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
437 ## Return a list of Path instances sorted in increasing order according to the min, then the inverse of the query length, and finally their initial order
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
438 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
439 # @param lPaths: list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
440 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
441 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
442 def getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
443 return sorted( lPaths, key=lambda iPath: ( iPath.getQueryMin(), 1 / float(iPath.getLengthOnQuery()) ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
444
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
445
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
446 ## Return a list of the distinct identifiers
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
447 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
448 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
449 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
450 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
451 def getListOfDistinctIdentifiers( lPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
452 sDistinctIdentifiers = set([iPath.id for iPath in lPaths])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
453 return list(sDistinctIdentifiers)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
454
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
455
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
456 ## Return a list of the distinct query names present in the collection
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
457 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
458 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
459 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
460 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
461 def getListOfDistinctQueryNames( lPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
462 sDistinctQueryNames = set([iPath.range_query.seqname for iPath in lPaths])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
463 return list(sDistinctQueryNames)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
464
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
465
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
466 ## Return a list of the distinct subject names present in the collection
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
467 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
468 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
469 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
470 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
471 def getListOfDistinctSubjectNames( lPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
472 sDistinctSubjectNames = set([iPath.range_subject.seqname for iPath in lPaths])
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
473 return list(sDistinctSubjectNames)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
474
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
475
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
476 ## Return a list of paths with matching query names
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
477 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
478 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
479 # @param queryName query name to extract
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
480 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
481 def extractPathsFromQueryName(lPaths, queryName):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
482 return [iPath for iPath in lPaths if iPath.getQueryName() == queryName]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
483
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
484
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
485 ## Return a list of paths with matching query names
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
486 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
487 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
488 # @param lQueryName query name list to extract
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
489 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
490 def extractPathsFromQueryNameList(lPaths, lQueryNames):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
491 d = dict.fromkeys(lQueryNames)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
492 return [iPath for iPath in lPaths if iPath.getQueryName() in d]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
493
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
494
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
495 ## Return a list of paths with matching subject names
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
496 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
497 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
498 # @param subjectName subject name to extract
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
499 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
500 def extractPathsFromSubjectName(lPaths, subjectName):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
501 return [iPath for iPath in lPaths if iPath.getSubjectName() == subjectName]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
502
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
503
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
504 ## Return a list of paths with coordinates overlap a given range
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
505 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
506 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
507 # @param queryName query name to extract
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
508 # @param start starting position
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
509 # @param end ending position
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
510 # @return list of Path instance
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
511 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
512 def extractPathsFromQueryCoord(lPaths, queryName, start, end):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
513 lExtractedPaths = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
514 iAlign = Align(range_q = Range(queryName, start, end))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
515
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
516 for path in PathUtils.extractPathsFromQueryName(lPaths, queryName):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
517 if path.isQueryOverlapping(iAlign):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
518 lExtractedPaths.append(path)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
519
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
520 return lExtractedPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
521
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
522
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
523 ## Return a list of lists containing query coordinates of the connections sorted in increasing order.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
524 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
525 # @param lConnectedPaths: list of Path instances having the same identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
526 # @param minLength: threshold below which connections are not reported (default= 0 bp)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
527 # @note: return only connections longer than threshold
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
528 # @note: if coordinate on query ends at 100, return 101
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
529 # @warning: Path instances MUST be sorted in increasing order according to query coordinates
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
530 # @warning: Path instances MUST be on direct query strand (and maybe on reverse subject strand)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
531 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
532 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
533 def getListOfJoinCoordinatesOnQuery(lConnectedPaths, minLength=0):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
534 lJoinCoordinates = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
535 for i in xrange(1,len(lConnectedPaths)):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
536 startJoin = lConnectedPaths[i-1].range_query.end
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
537 endJoin = lConnectedPaths[i].range_query.start
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
538 if endJoin - startJoin + 1 > minLength:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
539 lJoinCoordinates.append( [ startJoin + 1, endJoin - 1 ] )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
540 return lJoinCoordinates
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
541
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
542
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
543 ## Return the length on the query of all Path instance in the given list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
544 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
545 # @param lPaths list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
546 # @note overlapping ranges are not summed but truncated.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
547 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
548 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
549 def getLengthOnQueryFromPathList( lPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
550 lSets = PathUtils.getSetListFromQueries( lPaths )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
551 lMergedSets = SetUtils.mergeSetsInList( lSets )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
552 length = SetUtils.getCumulLength( lMergedSets )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
553 return length
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
554
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
555
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
556 ## Convert a Path file into an Align file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
557 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
558 # @param pathFile: name of the input Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
559 # @param alignFile: name of the output Align file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
560 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
561 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
562 def convertPathFileIntoAlignFile(pathFile, alignFile):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
563 pathFileHandler = open(pathFile, "r")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
564 alignFileHandler = open(alignFile, "w")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
565 iPath = Path()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
566 for line in pathFileHandler:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
567 iPath.setFromString(line)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
568 iAlign = iPath.getAlignInstance()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
569 iAlign.write(alignFileHandler)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
570 pathFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
571 alignFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
572
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
573
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
574 #TODO: duplicated method => to rename with the name of the next method (which is called) ?
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
575 ## Convert a Path File into a Map file with query coordinates only
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
576 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
577 # @param pathFile: name of the input Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
578 # @param mapFile: name of the output Map file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
579 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
580 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
581 def convertPathFileIntoMapFileWithQueryCoordsOnly( pathFile, mapFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
582 pathFileHandler = open(pathFile, "r")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
583 mapFileHandler = open(mapFile, "w")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
584 p = Path()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
585 for line in pathFileHandler:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
586 p.reset()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
587 p.setFromTuple(line.split("\t"))
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
588 p.writeSubjectAsMapOfQuery(mapFileHandler)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
589 pathFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
590 mapFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
591
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
592
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
593 ## for each line of a given Path file, write the coordinates of the subject on the query as one line in a Map file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
594 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
595 # @param pathFile: name of the input Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
596 # @param mapFile: name of the output Map file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
597 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
598 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
599 def convertPathFileIntoMapFileWithSubjectsOnQueries( pathFile, mapFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
600 PathUtils.convertPathFileIntoMapFileWithQueryCoordsOnly( pathFile, mapFile )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
601
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
602
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
603 ## Merge matches on queries
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
604 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
605 # @param inFile: name of the input Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
606 # @param outFile: name of the output Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
607 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
608 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
609 def mergeMatchesOnQueries(inFile, outFile):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
610 mapFile = "%s.map" % inFile
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
611 PathUtils.convertPathFileIntoMapFileWithQueryCoordsOnly(inFile, mapFile)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
612 cmd = "mapOp"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
613 cmd += " -q %s" % mapFile
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
614 cmd += " -m"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
615 cmd += " 2>&1 > /dev/null"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
616 exitStatus = os.system(cmd)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
617 if exitStatus != 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
618 print "ERROR: mapOp returned %i" % exitStatus
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
619 sys.exit(1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
620 os.remove(mapFile)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
621 mergeFile = "%s.merge" % mapFile
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
622 mergeFileHandler = open(mergeFile, "r")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
623 outFileHandler = open(outFile, "w")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
624 m = Map()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
625 for line in mergeFileHandler:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
626 m.reset()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
627 m.setFromString(line, "\t")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
628 m.writeAsQueryOfPath(outFileHandler)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
629 mergeFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
630 os.remove(mergeFile)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
631 outFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
632
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
633
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
634 ## Filter chains of Path(s) which length is below a given threshold
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
635 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
636 # @param lPaths: list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
637 # @param minLengthChain: minimum length of a chain to be kept
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
638 # @note: a chain may contain a single Path instance
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
639 # @return: a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
640 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
641 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
642 def filterPathListOnChainLength( lPaths, minLengthChain ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
643 lFilteredPaths = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
644 dPathnum2Paths = PathUtils.getDictOfListsWithIdAsKey( lPaths )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
645 for pathnum in dPathnum2Paths.keys():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
646 length = PathUtils.getLengthOnQueryFromPathList( dPathnum2Paths[ pathnum ] )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
647 if length >= minLengthChain:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
648 lFilteredPaths += dPathnum2Paths[ pathnum ]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
649 return lFilteredPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
650
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
651
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
652 ## Return a Path list from a Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
653 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
654 # @param pathFile string name of a Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
655 # @return a list of Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
656 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
657 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
658 def getPathListFromFile( pathFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
659 lPaths = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
660 with open(pathFile, "r") as pathFileHandler:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
661 for line in pathFileHandler:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
662 iPath = Path()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
663 iPath.setFromString(line)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
664 lPaths.append(iPath)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
665 return lPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
666
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
667
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
668 ## Convert a chain into a 'pathrange'
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
669 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
670 # @param lPaths a list of Path instances with the same identifier
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
671 # @note: the min and max of each Path is used
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
672 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
673 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
674 def convertPathListToPathrange( lPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
675 if len(lPaths) == 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
676 return
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
677 if len(lPaths) == 1:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
678 return lPaths[0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
679 iPathrange = copy.deepcopy( lPaths[0] )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
680 iPathrange.identity = lPaths[0].identity * lPaths[0].getLengthOnQuery()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
681 cumulQueryLength = iPathrange.getLengthOnQuery()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
682 for iPath in lPaths[1:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
683 if iPath.id != iPathrange.id:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
684 msg = "ERROR: two Path instances in the chain have different identifiers"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
685 sys.stderr.write( "%s\n" % ( msg ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
686 sys.exit(1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
687 if iPathrange.range_subject.isOnDirectStrand() != iPath.range_subject.isOnDirectStrand():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
688 msg = "ERROR: two Path instances in the chain are on different strands"
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
689 sys.stderr.write( "%s\n" % ( msg ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
690 sys.exit(1)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
691 iPathrange.range_query.start = min( iPathrange.range_query.start, iPath.range_query.start )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
692 iPathrange.range_query.end = max( iPathrange.range_query.end, iPath.range_query.end )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
693 if iPathrange.range_subject.isOnDirectStrand():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
694 iPathrange.range_subject.start = min( iPathrange.range_subject.start, iPath.range_subject.start )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
695 iPathrange.range_subject.end = max( iPathrange.range_subject.end, iPath.range_subject.end )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
696 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
697 iPathrange.range_subject.start = max( iPathrange.range_subject.start, iPath.range_subject.start )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
698 iPathrange.range_subject.end = min( iPathrange.range_subject.end, iPath.range_subject.end )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
699 iPathrange.e_value = min( iPathrange.e_value, iPath.e_value )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
700 iPathrange.score += iPath.score
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
701 iPathrange.identity += iPath.identity * iPath.getLengthOnQuery()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
702 cumulQueryLength += iPath.getLengthOnQuery()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
703 iPathrange.identity = iPathrange.identity / float(cumulQueryLength)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
704 return iPathrange
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
705
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
706
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
707 ## Convert a Path file into an Align file via 'pathrange'
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
708 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
709 # @param pathFile: name of the input Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
710 # @param alignFile: name of the output Align file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
711 # @param verbose integer verbosity level
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
712 # @note: the min and max of each Path is used
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
713 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
714 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
715 def convertPathFileIntoAlignFileViaPathrange( pathFile, alignFile, verbose=0 ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
716 lPaths = PathUtils.getPathListFromFile( pathFile )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
717 dId2PathList = PathUtils.getDictOfListsWithIdAsKey( lPaths )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
718 lIds = dId2PathList.keys()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
719 lIds.sort()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
720 if verbose > 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
721 msg = "number of chains: %i" % ( len(lIds) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
722 sys.stdout.write( "%s\n" % ( msg ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
723 sys.stdout.flush()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
724 alignFileHandler = open( alignFile, "w" )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
725 for identifier in lIds:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
726 iPath = PathUtils.convertPathListToPathrange( dId2PathList[ identifier ] )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
727 iAlign = iPath.getAlignInstance()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
728 iAlign.write( alignFileHandler )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
729 alignFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
730
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
731
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
732 ## Split a list of Path instances according to the name of the query
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
733 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
734 # @param lInPaths list of align instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
735 # @return lOutPathLists list of align instances lists
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
736 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
737 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
738 def splitPathListByQueryName( lInPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
739 lInSortedPaths = sorted( lInPaths, key=lambda o: o.range_query.seqname )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
740 lOutPathLists = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
741 if len(lInSortedPaths) != 0 :
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
742 lPathsForCurrentQuery = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
743 previousQuery = lInSortedPaths[0].range_query.seqname
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
744 for iPath in lInSortedPaths :
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
745 currentQuery = iPath.range_query.seqname
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
746 if previousQuery != currentQuery :
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
747 lOutPathLists.append( lPathsForCurrentQuery )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
748 previousQuery = currentQuery
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
749 lPathsForCurrentQuery = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
750 lPathsForCurrentQuery.append( iPath )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
751
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
752 lOutPathLists.append(lPathsForCurrentQuery)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
753
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
754 return lOutPathLists
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
755
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
756
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
757 ## Create an Path file from each list of Path instances in the input list
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
758 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
759 # @param lPathList list of lists with Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
760 # @param pattern string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
761 # @param dirName string
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
762 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
763 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
764 def createPathFiles( lPathList, pattern, dirName="" ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
765 nbFiles = len(lPathList)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
766 countFile = 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
767 if dirName != "" :
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
768 if dirName[-1] != "/":
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
769 dirName = dirName + '/'
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
770 os.mkdir( dirName )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
771
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
772 for lPath in lPathList:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
773 fileName = dirName + pattern + "_%s.path" % ( str(countFile).zfill( len(str(nbFiles)) ) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
774 PathUtils.writeListInFile( lPath, fileName )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
775 countFile += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
776
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
777
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
778 ## Merge all overlapping Path instances in a list without considering the identifiers
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
779 # Start by sorting the Path instances by their increasing min coordinate
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
780 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
781 # @return: a new list with the merged Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
782 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
783 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
784 def mergePathsInList( lPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
785 lMergedPaths = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
786 if len(lPaths)==0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
787 return lMergedPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
788
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
789 lSortedPaths = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
790
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
791 prev_count = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
792 for iPath in lSortedPaths[0:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
793 if prev_count != len(lSortedPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
794 for i in lSortedPaths[ prev_count + 1: ]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
795 if iPath.isOverlapping( i ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
796 iPath.merge( i )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
797 isAlreadyInList = False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
798 for newPath in lMergedPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
799 if newPath.isOverlapping( iPath ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
800 isAlreadyInList = True
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
801 newPath.merge( iPath )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
802 lMergedPaths [ lMergedPaths.index( newPath ) ] = newPath
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
803 if not isAlreadyInList:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
804 lMergedPaths.append( iPath )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
805 prev_count += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
806 return lMergedPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
807
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
808
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
809 ## Merge all overlapping Path instances in a list without considering if subjects are overlapping.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
810 # Start by sorting the Path instances by their increasing min coordinate.
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
811 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
812 # @return: a new list with the merged Path instances
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
813 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
814 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
815 def mergePathsInListUsingQueryCoordsOnly( lPaths ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
816 lMergedPaths = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
817 if len(lPaths)==0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
818 return lMergedPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
819
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
820 lSortedPaths = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
821
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
822 prev_count = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
823 for iPath in lSortedPaths[0:]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
824 if prev_count != len(lSortedPaths):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
825 for i in lSortedPaths[ prev_count + 1: ]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
826 if iPath.isQueryOverlapping( i ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
827 iPath.merge( i )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
828 isAlreadyInList = False
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
829 for newPath in lMergedPaths:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
830 if newPath.isQueryOverlapping( iPath ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
831 isAlreadyInList = True
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
832 newPath.merge( iPath )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
833 lMergedPaths [ lMergedPaths.index( newPath ) ] = newPath
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
834 if not isAlreadyInList:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
835 lMergedPaths.append( iPath )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
836 prev_count += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
837 return lMergedPaths
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
838
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
839
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
840 ## Convert a Path file into a GFF file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
841 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
842 # @param pathFile: name of the input Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
843 # @param gffFile: name of the output GFF file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
844 # @param source: source to write in the GFF file (column 2)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
845 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
846 # @note the 'path' query is supposed to correspond to the 'gff' first column
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
847 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
848 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
849 def convertPathFileIntoGffFile( pathFile, gffFile, source="REPET", verbose=0 ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
850 dId2PathList = PathUtils.getDictOfListsWithIdAsKeyFromFile( pathFile )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
851 if verbose > 0:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
852 msg = "number of chains: %i" % ( len(dId2PathList.keys()) )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
853 sys.stdout.write( "%s\n" % msg )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
854 sys.stdout.flush()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
855 gffFileHandler = open( gffFile, "w" )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
856 for id in dId2PathList.keys():
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
857 if len( dId2PathList[ id ] ) == 1:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
858 iPath = dId2PathList[ id ][0]
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
859 string = iPath.toStringAsGff( ID="%i" % iPath.getIdentifier(),
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
860 source=source )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
861 gffFileHandler.write( "%s\n" % string )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
862 else:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
863 iPathrange = PathUtils.convertPathListToPathrange( dId2PathList[ id ] )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
864 string = iPathrange.toStringAsGff( ID="ms%i" % iPathrange.getIdentifier(),
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
865 source=source )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
866 gffFileHandler.write( "%s\n" % string )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
867 count = 0
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
868 for iPath in dId2PathList[ id ]:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
869 count += 1
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
870 string = iPath.toStringAsGff( type="match_part",
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
871 ID="mp%i-%i" % ( iPath.getIdentifier(), count ),
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
872 Parent="ms%i" % iPathrange.getIdentifier(),
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
873 source=source )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
874 gffFileHandler.write( "%s\n" % string )
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
875 gffFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
876
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
877
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
878 ## Convert a Path file into a Set file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
879 # replace old parser.pathrange2set
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
880 # @param pathFile: name of the input Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
881 # @param setFile: name of the output Set file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
882 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
883 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
884 def convertPathFileIntoSetFile( pathFile, setFile ):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
885 pathFileHandler = open(pathFile, "r")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
886 setFileHandler = open(setFile, "w")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
887 iPath = Path()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
888 for line in pathFileHandler:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
889 iPath.setFromString(line)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
890 iSet = iPath.getSubjectAsSetOfQuery()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
891 iSet.write(setFileHandler)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
892 pathFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
893 setFileHandler.close()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
894
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
895 ## Write Path File without duplicated Path (same query, same subject and same coordinate)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
896 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
897 # @param inputFile: name of the input Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
898 # @param outputFile: name of the output Path file
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
899 #
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
900 @staticmethod
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
901 def removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(inputFile, outputFile):
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
902 f = open(inputFile, "r")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
903 line = f.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
904 previousQuery = ""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
905 previousSubject = ""
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
906 lPaths = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
907 while line:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
908 iPath = Path()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
909 iPath.setFromString(line)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
910 query = iPath.getQueryName()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
911 subject = iPath.getSubjectName()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
912 if (query != previousQuery or subject != previousSubject) and lPaths != []:
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
913 lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
914 PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
915 lPaths = []
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
916 lPaths.append(iPath)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
917 previousQuery = query
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
918 previousSubject = subject
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
919 line = f.readline()
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
920 lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
921 PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a")
feef9a0db09d Uploaded
urgi-team
parents:
diff changeset
922 f.close()