comparison PsiCLASS-1.0.2/Constraints.hpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:903fc43d6227
1 #ifndef _MOURISL_CLASSES_CONSTRAINTS_HEADER
2 #define _MOURISL_CLASSES_CONSTRAINTS_HEADER
3
4 #include <vector>
5 #include <map>
6 #include <algorithm>
7 #include <string>
8 #include <string.h>
9
10 #include "BitTable.hpp"
11 #include "alignments.hpp"
12 #include "SubexonGraph.hpp"
13
14 struct _constraint
15 {
16 BitTable vector ; // subexon vector
17 double weight ;
18 double normAbund ;
19 double abundance ;
20 int support ;
21 int uniqSupport ;
22 int maxReadLen ; // the longest read length support this constraint.
23
24 int info ; // other usages.
25 int first, last ; // indicate the first and last index of the subexons.
26 } ;
27
28 struct _matePairConstraint
29 {
30 int i, j ;
31
32 int support ;
33 int uniqSupport ;
34
35 double abundance ;
36 double normAbund ;
37 int effectiveCount ;
38 int type ;
39 } ;
40
41 struct _readIdHeap
42 {
43 char *readId ;
44 int pos ;
45 int matePos ;
46 int idx ;
47 } ;
48
49 //----------------------------------------------------------------------------------
50 // We assume the access to the data structure is sorted by matePos.
51 // So we can "pre-cache" the ids with the same matePos.
52 class MateReadIds
53 {
54 private:
55 std::vector< struct _readIdHeap > heap ;
56
57 void HeapifyUp( int tag )
58 {
59 while ( tag > 1 )
60 {
61 if ( heap[tag / 2].matePos < heap[tag].matePos )
62 return ;
63 struct _readIdHeap tmp ;
64 tmp = heap[tag / 2] ;
65 heap[tag / 2] = heap[tag] ;
66 heap[tag] = tmp ;
67
68 tag /= 2 ;
69 }
70 }
71
72 void HeapifyDown( int tag )
73 {
74 int size = heap.size() ;
75 while ( 2 * tag < size )
76 {
77 int choose = 2 * tag ;
78 if ( 2 * tag + 1 < size &&
79 heap[ 2 * tag + 1].matePos < heap[2 * tag ].matePos )
80 {
81 choose = 2 * tag + 1 ;
82 }
83
84 if ( heap[tag].matePos < heap[choose].matePos )
85 return ;
86
87 struct _readIdHeap tmp ;
88 tmp = heap[choose] ;
89 heap[choose] = heap[tag] ;
90 heap[tag] = tmp ;
91
92 tag = choose ;
93 }
94 }
95
96 struct _readIdHeap Pop()
97 {
98 struct _readIdHeap ret ;
99 int size = heap.size() ;
100 if ( size < 2 )
101 {
102 ret.readId = NULL ;
103 return ret ;
104 }
105
106 ret = heap[1] ;
107
108 heap[1] = heap[ heap.size() - 1] ;
109 heap.pop_back() ;
110 HeapifyDown( 1 ) ;
111
112 return ret ;
113 }
114
115 int cachedMatePos ;
116 std::map<std::string, int> cachedIdx ;
117 bool hasMateReadIdSuffix ; // ignore the last ".{1,2}" or "/{1,2}" .
118 public:
119 MateReadIds()
120 {
121 // Push a dummy element so the vector becomes 1-based.
122 struct _readIdHeap nh ;
123 nh.readId = NULL ;
124 nh.pos = nh.idx = nh.matePos = -1 ;
125 heap.push_back( nh ) ;
126 cachedMatePos = -1 ;
127 hasMateReadIdSuffix = false ;
128 }
129 ~MateReadIds()
130 {
131 int size = heap.size() ;
132 std::map<std::string, int>().swap( cachedIdx ) ;
133 for ( int i = 0 ; i < size ; ++i )
134 if ( heap[i].readId != NULL )
135 free( heap[i].readId ) ;
136 }
137
138 void Clear()
139 {
140 std::map<std::string, int>().swap( cachedIdx ) ;
141
142 int size = heap.size() ;
143 for ( int i = 0 ; i < size ; ++i )
144 if ( heap[i].readId != NULL )
145 free( heap[i].readId ) ;
146 std::vector<struct _readIdHeap>().swap( heap ) ;
147
148 struct _readIdHeap nh ;
149 nh.readId = NULL ;
150 nh.pos = nh.idx = nh.matePos = -1 ;
151 heap.push_back( nh ) ;
152 cachedMatePos = -1 ;
153 }
154
155 void Insert( char *id, int pos, int idx, int matePos )
156 {
157 struct _readIdHeap nh ;
158 nh.readId = strdup( id ) ;
159 nh.pos = pos ;
160 nh.idx = idx ;
161 nh.matePos = matePos ;
162
163 heap.push_back( nh ) ;
164 HeapifyUp( heap.size() - 1 ) ;
165 }
166
167 // If the id does not exist, return -1.
168 int Query( char *id, int matePos )
169 {
170 int size ;
171 size = heap.size() ;
172 if ( matePos > cachedMatePos )
173 {
174 std::map<std::string, int>().swap( cachedIdx ) ;
175
176 while ( size >= 2 && heap[1].matePos < matePos )
177 {
178 struct _readIdHeap r = Pop() ;
179 if ( r.readId )
180 {
181 free( r.readId ) ;
182 }
183 --size ;
184 }
185
186 while ( size >= 2 && heap[1].matePos == matePos )
187 {
188 struct _readIdHeap r = Pop() ;
189 cachedIdx[ std::string( r.readId ) ] = r.idx ;
190 if ( r.readId )
191 free( r.readId ) ;
192 --size ;
193 }
194 cachedMatePos = matePos ;
195 }
196 std::string s( id ) ;
197 if ( hasMateReadIdSuffix )
198 {
199 int len = s.length() ;
200 if ( len >= 2 && ( s[len - 1] == '1' || s[len - 1] == '2' )
201 && ( s[len - 2] == '.' || s[len - 2] == '/' ) )
202 {
203 s[len - 1] = '2' - s[len - 1] + '1' ;
204 }
205 }
206
207 if ( cachedIdx.find( s ) != cachedIdx.end() )
208 {
209 return cachedIdx[s] ;
210 }
211 return -1 ;
212 }
213
214 void UpdateIdx( std::vector<int> &newIdx )
215 {
216 int size = heap.size() ;
217 int i ;
218 for ( i = 1 ; i < size ; ++i )
219 {
220 heap[i].idx = newIdx[ heap[i].idx ] ;
221 }
222
223 for ( std::map<std::string, int>::iterator it = cachedIdx.begin() ; it != cachedIdx.end() ; ++it )
224 it->second = newIdx[ it->second ] ;
225 }
226
227 void SetHasMateReadIdSuffix( bool in )
228 {
229 hasMateReadIdSuffix = true ;
230 }
231 } ;
232
233
234 //--------------------------------------------------------------------------
235 class Constraints
236 {
237 private:
238 int prevStart, prevEnd ;
239 bool usePrimaryAsUnique ;
240 MateReadIds mateReadIds ;
241
242 Alignments *pAlignments ;
243
244 //@return: whether this alignment is compatible with the subexons or not.
245 bool ConvertAlignmentToBitTable( struct _pair *segments, int segCnt, struct _subexon *subexons, int seCnt, int seStart, struct _constraint &ct ) ;
246
247 // Sort to increasing order. Since the first subexon occupies the least important digit.
248 static bool CompSortConstraints( const struct _constraint &a, const struct _constraint &b )
249 {
250 //int k
251 if ( a.first < b.first )
252 return true ;
253 else if ( a.first > b.first )
254 return false ;
255
256 int diffPos = a.vector.GetFirstDifference( b.vector ) ;
257 if ( diffPos == -1 ) // case of equal.
258 return false ;
259
260 if ( a.vector.Test( diffPos ))
261 return false ;
262 else
263 return true ;
264 }
265
266 static bool CompSortMatePairs( const struct _matePairConstraint &a, const struct _matePairConstraint &b )
267 {
268 if ( a.i < b.i )
269 return true ;
270 else if ( a.i > b.i )
271 return false ;
272 else
273 {
274 if ( a.j < b.j )
275 return true ;
276 else
277 return false ;
278 }
279 }
280
281 void CoalesceSameConstraints() ;
282 void ComputeNormAbund( struct _subexon *subexons ) ;
283 public:
284 std::vector<struct _constraint> constraints ;
285 std::vector<struct _matePairConstraint> matePairs ;
286
287 Constraints()
288 {
289 usePrimaryAsUnique = false ;
290 }
291
292 Constraints( Alignments *a ): pAlignments( a )
293 {
294 }
295
296 ~Constraints()
297 {
298 int i ;
299 int size = constraints.size() ;
300 for ( i = 0 ; i < size ; ++i )
301 constraints[i].vector.Release() ;
302 constraints.clear() ;
303 std::vector<struct _constraint>().swap( constraints ) ;
304 matePairs.clear() ;
305 std::vector<struct _matePairConstraint>().swap( matePairs ) ;
306 }
307
308 void Clear()
309 {
310 //TODO: do I need to release the memory from BitTable?
311 constraints.clear() ;
312 }
313
314 void SetAlignments( Alignments *a )
315 {
316 pAlignments = a ;
317 }
318
319 void SetUsePrimaryAsUnique( bool in )
320 {
321 usePrimaryAsUnique = in ;
322 }
323
324 void Assign( Constraints &c )
325 {
326 int i ;
327 int size = constraints.size() ;
328 if ( size > 0 )
329 {
330 for ( i = 0 ; i < size ; ++i )
331 constraints[i].vector.Release() ;
332 constraints.clear() ;
333 std::vector<struct _constraint>().swap( constraints ) ;
334 }
335 matePairs.clear() ;
336 std::vector<struct _matePairConstraint>().swap( matePairs ) ;
337
338 //constraints.resize( c.constraints.size() ) ;
339 constraints = c.constraints ;
340 size = c.constraints.size() ;
341 for ( i = 0 ; i < size ; ++i )
342 {
343 /*struct _constraint nc ;
344 nc.weight = c.constraints[i].weight ;
345 nc.normAbund = c.constraints[i].normAbund ;
346 nc.abundance = c.constraints[i].abundance ;
347 nc.support = c.constraints[i].support ;
348 nc.uniqSupport = c.constraints[i].uniqSupport ;
349 nc.maxReadLen = c.constraints[i].maxReadLen ;
350 nc.info = c.constraints[i].info ;
351 nc.first = c.constraints[i].first ;
352 nc.last = c.constraints[i].last ;
353 nc.vector.Duplicate( c.constraints[i].vector ) ;
354 constraints[i] = ( nc ) ; */
355 constraints[i].vector.Nullify() ; // so that it won't affect the BitTable in "c"
356 constraints[i].vector.Duplicate( c.constraints[i].vector ) ;
357 }
358 matePairs = c.matePairs ;
359 pAlignments = c.pAlignments ;
360 }
361
362 void DownsampleConstraintsFrom( Constraints &c, int stride = 10 )
363 {
364 int i ;
365 int size = constraints.size(), k ;
366
367 if ( size > 0 )
368 {
369 for ( i = 0 ; i < size ; ++i )
370 constraints[i].vector.Release() ;
371 constraints.clear() ;
372 std::vector<struct _constraint>().swap( constraints ) ;
373 }
374 matePairs.clear() ;
375 std::vector<struct _matePairConstraint>().swap( matePairs ) ;
376
377 //constraints.resize( c.constraints.size() ) ;
378 //constraints = c.constraints ;
379 k = 0 ;
380 size = c.constraints.size() ;
381 for ( i = 0 ; i < size ; i += stride, ++k )
382 {
383 constraints.push_back( c.constraints[i] ) ;
384 constraints[k].vector.Nullify() ; // so that it won't affect the BitTable in "c"
385 constraints[k].vector.Duplicate( c.constraints[i].vector ) ;
386
387 /*std::vector<int> seIdx ;
388 constraints[k].vector.GetOnesIndices( seIdx ) ;
389 int j, l = seIdx.size() ;
390 for ( j = 2 ; j < l ; ++j )
391 {
392 constraints[k].vector.Unset( seIdx[j] ) ;
393 }
394 constraints[k].last = seIdx[1] ;*/
395 }
396 // mate pairs is not used. if we down-sampling
397 pAlignments = c.pAlignments ;
398 }
399
400 void TruncateConstraintsCoverFrom( Constraints &c, int seCnt, int maxConstraintSize )
401 {
402 int i ;
403 int size = constraints.size() ;
404
405 if ( size > 0 )
406 {
407 for ( i = 0 ; i < size ; ++i )
408 constraints[i].vector.Release() ;
409 constraints.clear() ;
410 std::vector<struct _constraint>().swap( constraints ) ;
411 }
412 matePairs.clear() ;
413 std::vector<struct _matePairConstraint>().swap( matePairs ) ;
414
415 //constraints.resize( c.constraints.size() ) ;
416 //constraints = c.constraints ;
417 size = c.constraints.size() ;
418 for ( i = 0 ; i < size ; ++i )
419 {
420 constraints.push_back( c.constraints[i] ) ;
421 constraints[i].vector.Nullify() ; // so that it won't affect the BitTable in "c"
422 constraints[i].vector.Init( seCnt ) ;
423 std::vector<int> seIdx ;
424 c.constraints[i].vector.GetOnesIndices( seIdx ) ;
425 int j, l = seIdx.size() ;
426 for ( j = 0 ; j < maxConstraintSize && j < l ; ++j )
427 {
428 constraints[i].vector.Set( seIdx[j] ) ;
429 }
430 constraints[i].last = seIdx[j - 1] ;
431 }
432 // mate pairs is not used. if we down-sampling
433 pAlignments = c.pAlignments ;
434 }
435
436 void SetHasMateReadIdSuffix( bool in )
437 {
438 mateReadIds.SetHasMateReadIdSuffix( in ) ;
439 }
440
441 int BuildConstraints( struct _subexon *subexons, int seCnt, int start, int end ) ;
442
443 } ;
444
445 #endif