annotate commons/core/coord/PathUtils.py @ 56:97aa2e42bfdf

Uploaded
author m-zytnicki
date Wed, 05 Feb 2014 11:51:11 -0500
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 # http://www.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 import os
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 import sys
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 import copy
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 from commons.core.coord.Path import Path
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 from commons.core.coord.SetUtils import SetUtils
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 from commons.core.coord.Map import Map
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 from commons.core.coord.AlignUtils import AlignUtils
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 from commons.core.checker.RepetException import RepetDataException
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41 ## Static methods for the manipulation of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 class PathUtils ( object ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 ## Change the identifier of each Set instance in the given list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 # @param lPaths list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 # @param newId new identifier
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 def changeIdInList(lPaths, newId):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 iPath.id = newId
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 changeIdInList = staticmethod( changeIdInList )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 ## Return a list of Set instances containing the query range from a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 # @param lPaths a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 def getSetListFromQueries(lPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 lSets = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 lSets.append( iPath.getSubjectAsSetOfQuery() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 return lSets
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 getSetListFromQueries = staticmethod( getSetListFromQueries )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 #TODO: add tests !!!!
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 ## Return a list of Set instances containing the query range from a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 # @param lPaths a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 @staticmethod
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 def getSetListFromSubjects(lPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 lSets = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 lSets.append( iPath.getQuerySetOfSubject() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 return lSets
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 ## Return a sorted list of Range instances containing the subjects from a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84 # @param lPaths a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 # @note meaningful only if all Path instances have same identifier
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 def getRangeListFromSubjects( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 lRanges = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 lRanges.append( iPath.range_subject )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 if lRanges[0].isOnDirectStrand():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92 return sorted( lRanges, key=lambda iRange: ( iRange.getMin(), iRange.getMax() ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 return sorted( lRanges, key=lambda iRange: ( iRange.getMax(), iRange.getMin() ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 getRangeListFromSubjects = staticmethod( getRangeListFromSubjects )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99 ## Return a tuple with min and max of query coordinates from Path instances in the given list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 # @param lPaths a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 def getQueryMinMaxFromPathList(lPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 qmin = -1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105 qmax = -1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 if qmin == -1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 qmin = iPath.range_query.start
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109 qmin = min(qmin, iPath.range_query.getMin())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 qmax = max(qmax, iPath.range_query.getMax())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111 return (qmin, qmax)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113 getQueryMinMaxFromPathList = staticmethod( getQueryMinMaxFromPathList )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 ## Return a tuple with min and max of subject coordinates from Path instances in the given list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118 # @param lPaths lists of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 def getSubjectMinMaxFromPathList(lPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 smin = -1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 smax = -1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 if smin == -1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125 smin = iPath.range_subject.start
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 smin = min(smin, iPath.range_subject.getMin())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 smax = max(smax, iPath.range_subject.getMax())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128 return (smin, smax)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 getSubjectMinMaxFromPathList = staticmethod( getSubjectMinMaxFromPathList )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133 ## Return True if the query range of any Path instance from the first list overlaps with the query range of any Path instance from the second list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 # @param lPaths1: list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136 # @param lPaths2: list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137 # @return boolean
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 def areQueriesOverlappingBetweenPathLists( lPaths1, lPaths2 ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 lSortedPaths1 = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths1 )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 lSortedPaths2 = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lPaths2 )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 i = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 j = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 while i != len(lSortedPaths1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 while j != len(lSortedPaths2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 if not lSortedPaths1[i].range_query.isOverlapping( lSortedPaths2[j].range_query ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 j += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 i += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153 areQueriesOverlappingBetweenPathLists = staticmethod( areQueriesOverlappingBetweenPathLists )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156 ## Show Path instances contained in the given list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
158 # @param lPaths a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
159 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
160 def showList(lPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
161 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
162 iPath.show()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
163
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
164 showList = staticmethod( showList )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
165
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
166
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
167 ## Write Path instances contained in the given list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
168 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
169 # @param lPaths a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
170 # @param fileName name of the file to write the Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171 # @param mode the open mode of the file ""w"" or ""a""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
173 def writeListInFile(lPaths, fileName, mode="w"):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
174 AlignUtils.writeListInFile(lPaths, fileName, mode)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
175
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176 writeListInFile = staticmethod( writeListInFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
178
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
179 ## Return new list of Path instances with no duplicate
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181 # @param lPaths a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
182 # @param useOnlyCoord boolean if True, check only coordinates and sequence names
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
183 # @return lUniqPaths a path instances list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
184 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
185 def getPathListWithoutDuplicates(lPaths, useOnlyCoord = False):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
186 if len(lPaths) < 2:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
187 return lPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
188 lSortedPaths = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier( lPaths )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
189 lUniqPaths = [ lSortedPaths[0] ]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
190 if useOnlyCoord:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
191 for iPath in lSortedPaths[1:]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
192 if iPath.range_query.start != lUniqPaths[-1].range_query.start \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
193 or iPath.range_query.end != lUniqPaths[-1].range_query.end \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
194 or iPath.range_query.seqname != lUniqPaths[-1].range_query.seqname \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
195 or iPath.range_subject.start != lUniqPaths[-1].range_subject.start \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
196 or iPath.range_subject.end != lUniqPaths[-1].range_subject.end \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
197 or iPath.range_subject.seqname != lUniqPaths[-1].range_subject.seqname:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
198 lUniqPaths.append( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
199 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
200 for iPath in lSortedPaths[1:]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
201 if iPath != lUniqPaths[-1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
202 lUniqPaths.append( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
203 return lUniqPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
204
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
205 getPathListWithoutDuplicates = staticmethod( getPathListWithoutDuplicates )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
206
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
207
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
208 def getPathListWithoutDuplicatesOnQueryCoord(lPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
209 if len(lPaths) < 2:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
210 return lPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
211 lSortedPaths = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier( lPaths )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
212 lUniqPaths = [ lSortedPaths[0] ]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
213 for iPath in lSortedPaths[1:]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
214 if iPath.range_query.start != lUniqPaths[-1].range_query.start \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
215 or iPath.range_query.end != lUniqPaths[-1].range_query.end \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
216 or iPath.range_query.seqname != lUniqPaths[-1].range_query.seqname:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
217 lUniqPaths.append( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
218 return lUniqPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
219
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
220 getPathListWithoutDuplicatesOnQueryCoord = staticmethod(getPathListWithoutDuplicatesOnQueryCoord)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
221
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
222
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
223 ## Split a Path list in several Path lists according to the identifier
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
224 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
225 # @param lPaths a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
226 # @return a dictionary which keys are identifiers and values Path lists
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
227 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
228 def getDictOfListsWithIdAsKey( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
229 dId2PathList = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
230 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
231 if dId2PathList.has_key( iPath.id ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
232 dId2PathList[ iPath.id ].append( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
233 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
234 dId2PathList[ iPath.id ] = [ iPath ]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
235 return dId2PathList
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
236
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
237 getDictOfListsWithIdAsKey = staticmethod( getDictOfListsWithIdAsKey )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
238
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
239
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
240 ## Split a Path file in several Path lists according to the identifier
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
241 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
242 # @param pathFile name of the input Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
243 # @return a dictionary which keys are identifiers and values Path lists
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
244 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
245 def getDictOfListsWithIdAsKeyFromFile( pathFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
246 dId2PathList = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
247 pathFileHandler = open( pathFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
248 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
249 line = pathFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
250 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
251 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
252 iPath = Path()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
253 iPath.setFromString( line )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
254 if dId2PathList.has_key( iPath.id ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
255 dId2PathList[ iPath.id ].append( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
256 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
257 dId2PathList[ iPath.id ] = [ iPath ]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
258 pathFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
259 return dId2PathList
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
260
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
261 getDictOfListsWithIdAsKeyFromFile = staticmethod( getDictOfListsWithIdAsKeyFromFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
262
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
263
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
264 ## Return a list of Path list(s) obtained while splitting a list of connected Path instances according to another based on query coordinates
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
265 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
266 # @param lToKeep: a list of Path instances to keep (reference)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
267 # @param lToUnjoin: a list of Path instances to unjoin
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
268 # @return: list of Path list(s) (can be empty if one of the input lists is empty)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
269 # @warning: all the path instances in a given list MUST be connected (i.e. same identifier)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
270 # @warning: all the path instances in a given list MUST NOT overlap neither within each other nor with the Path instances of the other list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
271 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
272 def getPathListUnjoinedBasedOnQuery( lToKeep, lToUnjoin ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
273 lSortedToKeep = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lToKeep )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
274 lSortedToUnjoin = PathUtils.getPathListSortedByIncreasingMinQueryThenMaxQuery( lToUnjoin )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
275 if lToUnjoin == []:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
276 return []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
277 if lToKeep == []:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
278 return [ lToUnjoin ]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
279
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
280 lLists = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
281 k = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
282 while k < len(lSortedToKeep):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
283 j1 = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
284 while j1 < len(lSortedToUnjoin) and lSortedToKeep[k].range_query.getMin() > lSortedToUnjoin[j1].range_query.getMax():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
285 j1 += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
286 if j1 == len(lSortedToUnjoin):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
287 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
288 if j1 != 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
289 lLists.append( lSortedToUnjoin[:j1] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
290 del lSortedToUnjoin[:j1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
291 j1 = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
292 if k+1 == len(lSortedToKeep):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
293 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
294 j2 = j1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
295 if j2 < len(lSortedToUnjoin) and lSortedToKeep[k+1].range_query.getMin() > lSortedToUnjoin[j2].range_query.getMax():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
296 while j2 < len(lSortedToUnjoin) and lSortedToKeep[k+1].range_query.getMin() > lSortedToUnjoin[j2].range_query.getMax():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
297 j2 += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
298 lLists.append( lSortedToUnjoin[j1:j2] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
299 del lSortedToUnjoin[j1:j2]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
300 k += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
301
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
302 if lLists != [] or k == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
303 lLists.append( lSortedToUnjoin )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
304 return lLists
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
305
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
306 getPathListUnjoinedBasedOnQuery = staticmethod( getPathListUnjoinedBasedOnQuery )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
307
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
308
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
309 ## Return the identity of the Path list, the identity of each instance being weighted by the length of each query range
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
310 # All Paths should have the same query and subject.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
311 # The Paths are merged using query coordinates only.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
312 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
313 # @param lPaths list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
314 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
315 def getIdentityFromPathList( lPaths, checkSubjects=True ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
316 if len( PathUtils.getListOfDistinctQueryNames( lPaths ) ) > 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
317 msg = "ERROR: try to compute identity from Paths with different queries"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
318 sys.stderr.write( "%s\n" % msg )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
319 sys.stderr.flush()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
320 raise Exception
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
321 if checkSubjects and len( PathUtils.getListOfDistinctSubjectNames( lPaths ) ) > 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
322 msg = "ERROR: try to compute identity from Paths with different subjects"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
323 sys.stderr.write( "%s\n" % msg )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
324 sys.stderr.flush()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
325 raise Exception
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
326 identity = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
327 lMergedPaths = PathUtils.mergePathsInListUsingQueryCoordsOnly( lPaths )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
328 lQuerySets = PathUtils.getSetListFromQueries( lMergedPaths )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
329 lMergedQuerySets = SetUtils.mergeSetsInList( lQuerySets )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
330 totalLengthOnQry = SetUtils.getCumulLength( lMergedQuerySets )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
331 for iPath in lMergedPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
332 identity += iPath.identity * iPath.getLengthOnQuery()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
333 weightedIdentity = identity / float(totalLengthOnQry)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
334 if weightedIdentity < 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
335 msg = "ERROR: weighted identity '%.2f' outside range" % weightedIdentity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
336 sys.stderr.write("%s\n" % msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
337 sys.stderr.flush()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
338 raise Exception
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
339 elif weightedIdentity > 100:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
340 msg = "ERROR: weighted identity '%.2f' outside range" % weightedIdentity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
341 sys.stderr.write("%s\n" % msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
342 sys.stderr.flush()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
343 raise RepetDataException(msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
344 return weightedIdentity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
345
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
346 getIdentityFromPathList = staticmethod( getIdentityFromPathList )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
347
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
348
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
349 ## Return a list of Path instances sorted in increasing order according to the min of the query, then the max of the query, and finally their initial order.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
350 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
351 # @param lPaths list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
352 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
353 def getPathListSortedByIncreasingMinQueryThenMaxQuery(lPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
354 return sorted( lPaths, key=lambda iPath: ( iPath.getQueryMin(), iPath.getQueryMax() ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
355
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
356 getPathListSortedByIncreasingMinQueryThenMaxQuery = staticmethod( getPathListSortedByIncreasingMinQueryThenMaxQuery )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
357
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
358
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
359 ## Return a list of Path instances sorted in increasing order according to the min of the query, then the max of the query, then their identifier, and finally their initial order.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
360 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
361 # @param lPaths list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
362 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
363 def getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier(lPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
364 return sorted( lPaths, key=lambda iPath: ( iPath.getQueryMin(), iPath.getQueryMax(), iPath.getIdentifier() ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
365
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
366 getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier = staticmethod( getPathListSortedByIncreasingMinQueryThenMaxQueryThenIdentifier )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
367
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
368
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
369 ## Return a list of Path instances sorted in increasing order according to the min of the query, then the max of the query, then the min of the subject, then the max of the subject and finally their initial order.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
370 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
371 # @param lPaths list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
372 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
373 @staticmethod
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
374 def getPathListSortedByIncreasingMinQueryThenMaxQueryThenMinSubjectThenMaxSubject(lPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
375 return sorted(lPaths, key=lambda iPath: (iPath.getQueryMin(), iPath.getQueryMax(), iPath.getSubjectMin(), iPath.getSubjectMax()))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
376
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
377
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
378 ## Return a list of the distinct identifiers
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
379 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
380 # @param lPaths list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
381 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
382 def getListOfDistinctIdentifiers( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
383 sDistinctIdentifiers = set()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
384 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
385 sDistinctIdentifiers.add(iPath.id)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
386 return list(sDistinctIdentifiers)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
387
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
388 getListOfDistinctIdentifiers = staticmethod( getListOfDistinctIdentifiers )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
389
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
390
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
391 ## Return a list of the distinct query names present in the collection
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
392 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
393 # @param lPaths list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
394 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
395 def getListOfDistinctQueryNames( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
396 sDistinctQueryNames = set()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
397 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
398 sDistinctQueryNames.add(iPath.range_query.seqname)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
399 return list(sDistinctQueryNames)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
400
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
401 getListOfDistinctQueryNames = staticmethod( getListOfDistinctQueryNames )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
402
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
403
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
404 ## Return a list of the distinct subject names present in the collection
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
405 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
406 # @param lPaths list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
407 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
408 def getListOfDistinctSubjectNames( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
409 sDistinctSubjectNames = set()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
410 for iPath in lPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
411 sDistinctSubjectNames.add(iPath.range_subject.seqname)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
412 return list(sDistinctSubjectNames)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
413
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
414 getListOfDistinctSubjectNames = staticmethod( getListOfDistinctSubjectNames )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
415
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
416
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
417 ## Return a list of lists containing query coordinates of the connections sorted in increasing order.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
418 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
419 # @param lConnectedPaths: list of Path instances having the same identifier
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
420 # @param minLength: threshold below which connections are not reported (default= 0 bp)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
421 # @note: return only connections longer than threshold
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
422 # @note: if coordinate on query ends at 100, return 101
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
423 # @warning: Path instances MUST be sorted in increasing order according to query coordinates
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
424 # @warning: Path instances MUST be on direct query strand (and maybe on reverse subject strand)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
425 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
426 def getListOfJoinCoordinatesOnQuery(lConnectedPaths, minLength=0):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
427 lJoinCoordinates = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
428 for i in xrange(1,len(lConnectedPaths)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
429 startJoin = lConnectedPaths[i-1].range_query.end
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
430 endJoin = lConnectedPaths[i].range_query.start
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
431 if endJoin - startJoin + 1 > minLength:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
432 lJoinCoordinates.append( [ startJoin + 1, endJoin - 1 ] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
433 return lJoinCoordinates
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
434
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
435 getListOfJoinCoordinatesOnQuery = staticmethod( getListOfJoinCoordinatesOnQuery )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
436
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
437
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
438 ## Return the length on the query of all Path instance in the given list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
439 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
440 # @param lPaths list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
441 # @note overlapping ranges are not summed but truncated.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
442 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
443 def getLengthOnQueryFromPathList( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
444 lSets = PathUtils.getSetListFromQueries( lPaths )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
445 lMergedSets = SetUtils.mergeSetsInList( lSets )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
446 length = SetUtils.getCumulLength( lMergedSets )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
447 return length
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
448
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
449 getLengthOnQueryFromPathList = staticmethod( getLengthOnQueryFromPathList )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
450
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
451
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
452 ## Convert a Path file into an Align file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
453 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
454 # @param pathFile: name of the input Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
455 # @param alignFile: name of the output Align file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
456 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
457 def convertPathFileIntoAlignFile(pathFile, alignFile):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
458 pathFileHandler = open( pathFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
459 alignFileHandler = open( alignFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
460 iPath = Path()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
461 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
462 line = pathFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
463 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
464 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
465 iPath.setFromString( line )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
466 iAlign = iPath.getAlignInstance()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
467 iAlign.write( alignFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
468 pathFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
469 alignFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
470
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
471 convertPathFileIntoAlignFile = staticmethod( convertPathFileIntoAlignFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
472
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
473 #TODO: duplicated method => to rename with the name of the next method (which is called) ?
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
474 ## Convert a Path File into a Map file with query coordinates only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
475 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
476 # @param pathFile: name of the input Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
477 # @param mapFile: name of the output Map file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
478 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
479 def convertPathFileIntoMapFileWithQueryCoordsOnly( pathFile, mapFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
480 pathFileHandler = open( pathFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
481 mapFileHandler = open( mapFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
482 p = Path()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
483 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
484 line = pathFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
485 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
486 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
487 p.reset()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
488 p.setFromTuple( line.split("\t") )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
489 p.writeSubjectAsMapOfQuery( mapFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
490 pathFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
491 mapFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
492
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
493 convertPathFileIntoMapFileWithQueryCoordsOnly = staticmethod( convertPathFileIntoMapFileWithQueryCoordsOnly )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
494
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
495
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
496 ## for each line of a given Path file, write the coordinates of the subject on the query as one line in a Map file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
497 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
498 # @param pathFile: name of the input Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
499 # @param mapFile: name of the output Map file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
500 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
501 def convertPathFileIntoMapFileWithSubjectsOnQueries( pathFile, mapFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
502 PathUtils.convertPathFileIntoMapFileWithQueryCoordsOnly( pathFile, mapFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
503 convertPathFileIntoMapFileWithSubjectsOnQueries = staticmethod( convertPathFileIntoMapFileWithSubjectsOnQueries )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
504
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
505
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
506 ## Merge matches on queries
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
507 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
508 # @param inFile: name of the input Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
509 # @param outFile: name of the output Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
510 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
511 def mergeMatchesOnQueries(inFile, outFile):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
512 mapFile = "%s.map" % ( inFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
513 PathUtils.convertPathFileIntoMapFileWithQueryCoordsOnly( inFile, mapFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
514 cmd = "mapOp"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
515 cmd += " -q %s" % ( mapFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
516 cmd += " -m"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
517 cmd += " 2>&1 > /dev/null"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
518 exitStatus = os.system( cmd )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
519 if exitStatus != 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
520 print "ERROR: mapOp returned %i" % ( exitStatus )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
521 sys.exit(1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
522 os.remove( mapFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
523 mergeFile = "%s.merge" % ( mapFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
524 mergeFileHandler = open( mergeFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
525 outFileHandler = open( outFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
526 m = Map()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
527 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
528 line = mergeFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
529 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
530 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
531 m.reset()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
532 m.setFromString( line, "\t" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
533 m.writeAsQueryOfPath( outFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
534 mergeFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
535 os.remove( mergeFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
536 outFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
537
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
538 mergeMatchesOnQueries = staticmethod( mergeMatchesOnQueries )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
539
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
540
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
541 ## Filter chains of Path(s) which length is below a given threshold
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
542 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
543 # @param lPaths: list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
544 # @param minLengthChain: minimum length of a chain to be kept
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
545 # @note: a chain may contain a single Path instance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
546 # @return: a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
547 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
548 def filterPathListOnChainLength( lPaths, minLengthChain ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
549 lFilteredPaths = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
550 dPathnum2Paths = PathUtils.getDictOfListsWithIdAsKey( lPaths )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
551 for pathnum in dPathnum2Paths.keys():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
552 length = PathUtils.getLengthOnQueryFromPathList( dPathnum2Paths[ pathnum ] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
553 if length >= minLengthChain:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
554 lFilteredPaths += dPathnum2Paths[ pathnum ]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
555 return lFilteredPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
556
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
557 filterPathListOnChainLength = staticmethod( filterPathListOnChainLength )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
558
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
559
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
560 ## Return a Path list from a Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
561 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
562 # @param pathFile string name of a Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
563 # @return a list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
564 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
565 def getPathListFromFile( pathFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
566 lPaths = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
567 pathFileHandler = open( pathFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
568 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
569 line = pathFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
570 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
571 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
572 iPath = Path()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
573 iPath.setFromString( line )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
574 lPaths.append( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
575 pathFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
576 return lPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
577
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
578 getPathListFromFile = staticmethod( getPathListFromFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
579
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
580
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
581 ## Convert a chain into a 'pathrange'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
582 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
583 # @param lPaths a list of Path instances with the same identifier
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
584 # @note: the min and max of each Path is used
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
585 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
586 def convertPathListToPathrange( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
587 if len(lPaths) == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
588 return
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
589 if len(lPaths) == 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
590 return lPaths[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
591 iPathrange = copy.deepcopy( lPaths[0] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
592 iPathrange.identity = lPaths[0].identity * lPaths[0].getLengthOnQuery()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
593 cumulQueryLength = iPathrange.getLengthOnQuery()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
594 for iPath in lPaths[1:]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
595 if iPath.id != iPathrange.id:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
596 msg = "ERROR: two Path instances in the chain have different identifiers"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
597 sys.stderr.write( "%s\n" % ( msg ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
598 sys.exit(1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
599 if iPathrange.range_subject.isOnDirectStrand() != iPath.range_subject.isOnDirectStrand():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
600 msg = "ERROR: two Path instances in the chain are on different strands"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
601 sys.stderr.write( "%s\n" % ( msg ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
602 sys.exit(1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
603 iPathrange.range_query.start = min( iPathrange.range_query.start, iPath.range_query.start )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
604 iPathrange.range_query.end = max( iPathrange.range_query.end, iPath.range_query.end )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
605 if iPathrange.range_subject.isOnDirectStrand():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
606 iPathrange.range_subject.start = min( iPathrange.range_subject.start, iPath.range_subject.start )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
607 iPathrange.range_subject.end = max( iPathrange.range_subject.end, iPath.range_subject.end )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
608 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
609 iPathrange.range_subject.start = max( iPathrange.range_subject.start, iPath.range_subject.start )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
610 iPathrange.range_subject.end = min( iPathrange.range_subject.end, iPath.range_subject.end )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
611 iPathrange.e_value = min( iPathrange.e_value, iPath.e_value )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
612 iPathrange.score += iPath.score
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
613 iPathrange.identity += iPath.identity * iPath.getLengthOnQuery()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
614 cumulQueryLength += iPath.getLengthOnQuery()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
615 iPathrange.identity = iPathrange.identity / float(cumulQueryLength)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
616 return iPathrange
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
617
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
618 convertPathListToPathrange = staticmethod( convertPathListToPathrange )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
619
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
620
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
621 ## Convert a Path file into an Align file via 'pathrange'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
622 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
623 # @param pathFile: name of the input Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
624 # @param alignFile: name of the output Align file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
625 # @param verbose integer verbosity level
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
626 # @note: the min and max of each Path is used
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
627 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
628 def convertPathFileIntoAlignFileViaPathrange( pathFile, alignFile, verbose=0 ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
629 lPaths = PathUtils.getPathListFromFile( pathFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
630 dId2PathList = PathUtils.getDictOfListsWithIdAsKey( lPaths )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
631 lIds = dId2PathList.keys()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
632 lIds.sort()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
633 if verbose > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
634 msg = "number of chains: %i" % ( len(lIds) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
635 sys.stdout.write( "%s\n" % ( msg ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
636 sys.stdout.flush()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
637 alignFileHandler = open( alignFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
638 for identifier in lIds:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
639 iPath = PathUtils.convertPathListToPathrange( dId2PathList[ identifier ] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
640 iAlign = iPath.getAlignInstance()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
641 iAlign.write( alignFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
642 alignFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
643
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
644 convertPathFileIntoAlignFileViaPathrange = staticmethod( convertPathFileIntoAlignFileViaPathrange )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
645
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
646
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
647 ## Split a list of Path instances according to the name of the query
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
648 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
649 # @param lInPaths list of align instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
650 # @return lOutPathLists list of align instances lists
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
651 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
652 def splitPathListByQueryName( lInPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
653 lInSortedPaths = sorted( lInPaths, key=lambda o: o.range_query.seqname )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
654 lOutPathLists = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
655 if len(lInSortedPaths) != 0 :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
656 lPathsForCurrentQuery = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
657 previousQuery = lInSortedPaths[0].range_query.seqname
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
658 for iPath in lInSortedPaths :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
659 currentQuery = iPath.range_query.seqname
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
660 if previousQuery != currentQuery :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
661 lOutPathLists.append( lPathsForCurrentQuery )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
662 previousQuery = currentQuery
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
663 lPathsForCurrentQuery = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
664 lPathsForCurrentQuery.append( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
665
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
666 lOutPathLists.append(lPathsForCurrentQuery)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
667
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
668 return lOutPathLists
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
669
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
670 splitPathListByQueryName = staticmethod( splitPathListByQueryName )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
671
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
672
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
673 ## Create an Path file from each list of Path instances in the input list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
674 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
675 # @param lPathList list of lists with Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
676 # @param pattern string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
677 # @param dirName string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
678 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
679 def createPathFiles( lPathList, pattern, dirName="" ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
680 nbFiles = len(lPathList)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
681 countFile = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
682 if dirName != "" :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
683 if dirName[-1] != "/":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
684 dirName = dirName + '/'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
685 os.mkdir( dirName )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
686
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
687 for lPath in lPathList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
688 fileName = dirName + pattern + "_%s.path" % ( str(countFile).zfill( len(str(nbFiles)) ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
689 PathUtils.writeListInFile( lPath, fileName )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
690 countFile += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
691
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
692 createPathFiles = staticmethod( createPathFiles )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
693
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
694
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
695 ## Return a list of Path instances sorted in increasing order according to the min, then the inverse of the query length, and finally their initial order
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
696 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
697 # @param lPaths: list of Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
698 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
699 def getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
700 return sorted( lPaths, key=lambda iPath: ( iPath.getQueryMin(), 1 / float(iPath.getLengthOnQuery()) ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
701
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
702 getPathListSortedByIncreasingQueryMinThenInvQueryLength = staticmethod( getPathListSortedByIncreasingQueryMinThenInvQueryLength )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
703
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
704
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
705 ## Merge all overlapping Path instances in a list without considering the identifiers
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
706 # Start by sorting the Path instances by their increasing min coordinate
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
707 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
708 # @return: a new list with the merged Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
709 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
710 def mergePathsInList( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
711 lMergedPaths = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
712 if len(lPaths)==0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
713 return lMergedPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
714
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
715 lSortedPaths = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
716
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
717 prev_count = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
718 for iPath in lSortedPaths[0:]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
719 if prev_count != len(lSortedPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
720 for i in lSortedPaths[ prev_count + 1: ]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
721 if iPath.isOverlapping( i ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
722 iPath.merge( i )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
723 isAlreadyInList = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
724 for newPath in lMergedPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
725 if newPath.isOverlapping( iPath ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
726 isAlreadyInList = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
727 newPath.merge( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
728 lMergedPaths [ lMergedPaths.index( newPath ) ] = newPath
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
729 if not isAlreadyInList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
730 lMergedPaths.append( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
731 prev_count += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
732 return lMergedPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
733
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
734 mergePathsInList = staticmethod( mergePathsInList )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
735
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
736
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
737 ## Merge all overlapping Path instances in a list without considering if subjects are overlapping.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
738 # Start by sorting the Path instances by their increasing min coordinate.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
739 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
740 # @return: a new list with the merged Path instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
741 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
742 def mergePathsInListUsingQueryCoordsOnly( lPaths ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
743 lMergedPaths = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
744 if len(lPaths)==0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
745 return lMergedPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
746
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
747 lSortedPaths = PathUtils.getPathListSortedByIncreasingQueryMinThenInvQueryLength( lPaths )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
748
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
749 prev_count = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
750 for iPath in lSortedPaths[0:]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
751 if prev_count != len(lSortedPaths):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
752 for i in lSortedPaths[ prev_count + 1: ]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
753 if iPath.isQueryOverlapping( i ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
754 iPath.merge( i )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
755 isAlreadyInList = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
756 for newPath in lMergedPaths:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
757 if newPath.isQueryOverlapping( iPath ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
758 isAlreadyInList = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
759 newPath.merge( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
760 lMergedPaths [ lMergedPaths.index( newPath ) ] = newPath
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
761 if not isAlreadyInList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
762 lMergedPaths.append( iPath )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
763 prev_count += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
764 return lMergedPaths
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
765
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
766 mergePathsInListUsingQueryCoordsOnly = staticmethod( mergePathsInListUsingQueryCoordsOnly )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
767
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
768
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
769 ## Convert a Path file into a GFF file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
770 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
771 # @param pathFile: name of the input Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
772 # @param gffFile: name of the output GFF file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
773 # @param source: source to write in the GFF file (column 2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
774 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
775 # @note the 'path' query is supposed to correspond to the 'gff' first column
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
776 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
777 def convertPathFileIntoGffFile( pathFile, gffFile, source="REPET", verbose=0 ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
778 dId2PathList = PathUtils.getDictOfListsWithIdAsKeyFromFile( pathFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
779 if verbose > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
780 msg = "number of chains: %i" % ( len(dId2PathList.keys()) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
781 sys.stdout.write( "%s\n" % msg )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
782 sys.stdout.flush()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
783 gffFileHandler = open( gffFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
784 for id in dId2PathList.keys():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
785 if len( dId2PathList[ id ] ) == 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
786 iPath = dId2PathList[ id ][0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
787 string = iPath.toStringAsGff( ID="%i" % iPath.getIdentifier(),
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
788 source=source )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
789 gffFileHandler.write( "%s\n" % string )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
790 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
791 iPathrange = PathUtils.convertPathListToPathrange( dId2PathList[ id ] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
792 string = iPathrange.toStringAsGff( ID="ms%i" % iPathrange.getIdentifier(),
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
793 source=source )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
794 gffFileHandler.write( "%s\n" % string )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
795 count = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
796 for iPath in dId2PathList[ id ]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
797 count += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
798 string = iPath.toStringAsGff( type="match_part",
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
799 ID="mp%i-%i" % ( iPath.getIdentifier(), count ),
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
800 Parent="ms%i" % iPathrange.getIdentifier(),
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
801 source=source )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
802 gffFileHandler.write( "%s\n" % string )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
803 gffFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
804
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
805 convertPathFileIntoGffFile = staticmethod( convertPathFileIntoGffFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
806
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
807
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
808 ## Convert a Path file into a Set file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
809 # replace old parser.pathrange2set
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
810 # @param pathFile: name of the input Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
811 # @param setFile: name of the output Set file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
812 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
813 def convertPathFileIntoSetFile( pathFile, setFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
814 pathFileHandler = open( pathFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
815 setFileHandler = open( setFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
816 iPath = Path()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
817 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
818 line = pathFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
819 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
820 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
821 iPath.setFromString( line )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
822 iSet = iPath.getSubjectAsSetOfQuery()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
823 iSet.write( setFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
824 pathFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
825 setFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
826
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
827 convertPathFileIntoSetFile = staticmethod( convertPathFileIntoSetFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
828
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
829 ## Write Path File without duplicated Path (same query, same subject and same coordinate)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
830 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
831 # @param inputFile: name of the input Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
832 # @param outputFile: name of the output Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
833 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
834 def removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(inputFile, outputFile):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
835 f = open(inputFile, "r")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
836 line = f.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
837 previousQuery = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
838 previousSubject = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
839 lPaths = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
840 while line:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
841 iPath = Path()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
842 iPath.setFromString(line)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
843 query = iPath.getQueryName()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
844 subject = iPath.getSubjectName()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
845 if (query != previousQuery or subject != previousSubject) and lPaths != []:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
846 lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
847 PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
848 lPaths = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
849 lPaths.append(iPath)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
850 previousQuery = query
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
851 previousSubject = subject
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
852 line = f.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
853 lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
854 PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
855 f.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
856 removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName = staticmethod(removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
857
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
858