annotate PsiCLASS-1.0.2/Constraints.cpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #include "Constraints.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 // return whether this constraint is compatible with the subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 bool Constraints::ConvertAlignmentToBitTable( struct _pair *segments, int segCnt,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 struct _subexon *subexons, int seCnt, int seStart, struct _constraint &ct )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 k = seStart ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 ct.vector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 // Each segment of an alignment can cover several subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 // But the first and last segment can partially cover a subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 for ( i = 0 ; i < segCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 int leftIdx, rightIdx ; // the range of subexons covered by this segment.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 leftIdx = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 rightIdx = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 for ( ; k < seCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 //if ( segments[0].b == 110282529 && segCnt == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 // printf( "(%d:%d %d):(%d:%d %d)\n", i, (int)segments[i].a, (int)segments[i].b, k, (int)subexons[k].start, (int)subexons[k].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 if ( subexons[k].start > segments[i].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 if ( segments[i].a > subexons[k].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 int relaxedWidth = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 if ( ( subexons[k].start >= segments[i].a && subexons[k].end <= segments[i].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 || ( i == 0 && subexons[k].start - relaxedWidth < segments[i].a && subexons[k].end <= segments[i].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 || ( i == segCnt - 1 && subexons[k].start >= segments[i].a && subexons[k].end + relaxedWidth > segments[i].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 || ( i == 0 && i == segCnt - 1 && subexons[k].start - relaxedWidth < segments[i].a && subexons[k].end + relaxedWidth > segments[i].b ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 if ( leftIdx == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 leftIdx = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 rightIdx = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 ct.vector.Set( k ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 if ( leftIdx == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 // The cover contradict the boundary.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 if ( !( ( subexons[leftIdx].leftType == 0 || subexons[leftIdx].start <= segments[i].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 && ( subexons[rightIdx].rightType == 0 || subexons[rightIdx].end >= segments[i].b ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 // The intron must exists in the subexon graph.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 if ( i > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 for ( j = 0 ; j < subexons[ ct.last ].nextCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 if ( subexons[ct.last].next[j] == leftIdx )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 if ( j >= subexons[ ct.last ].nextCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 // The subexons must be consecutive
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 for ( j = leftIdx + 1 ; j <= rightIdx ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 if ( subexons[j].start > subexons[j - 1].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 if ( i == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 ct.first = leftIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 ct.last = rightIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 void Constraints::CoalesceSameConstraints()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 int i, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 int size = constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 constraints[i].info = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 //printf( "constraints %d: %d %d %d\n", i, constraints[i].vector.Test( 0 ), constraints[i].vector.Test(1), constraints[i].support ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 std::vector<int> newIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 newIdx.resize( size, 0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 // Update the constraints.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 if ( size > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 std::sort( constraints.begin(), constraints.end(), CompSortConstraints ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 newIdx[ constraints[0].info ] = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 for ( i = 1 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 if ( constraints[k].vector.IsEqual( constraints[i].vector ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 constraints[k].weight += constraints[i].weight ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 constraints[k].support += constraints[i].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 constraints[k].uniqSupport += constraints[i].uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 constraints[k].maxReadLen = ( constraints[k].maxReadLen > constraints[i].maxReadLen ) ?
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 constraints[k].maxReadLen : constraints[i].maxReadLen ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 constraints[i].vector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 if ( k != i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 constraints[k] = constraints[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 newIdx[ constraints[i].info ] = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 constraints.resize( k + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 // Update the mate pairs.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 size = matePairs.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 if ( size > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 //printf( "%d %d: %d => %d | %d =>%d\n", i, newIdx.size(), matePairs[i].i, newIdx[ matePairs[i].i ],
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 // matePairs[i].j, newIdx[ matePairs[i].j ] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 matePairs[i].i = newIdx[ matePairs[i].i ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 matePairs[i].j = newIdx[ matePairs[i].j ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 std::sort( matePairs.begin(), matePairs.end(), CompSortMatePairs ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 for ( i = 1 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 if ( matePairs[i].i == matePairs[k].i && matePairs[i].j == matePairs[k].j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 matePairs[k].support += matePairs[i].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 matePairs[k].uniqSupport += matePairs[i].uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 matePairs[k] = matePairs[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 //printf( "%s: %d\n", __func__, matePairs[1].i) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 matePairs.resize( k + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 // Update the data structure for future mate pairs.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 mateReadIds.UpdateIdx( newIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 void Constraints::ComputeNormAbund( struct _subexon *subexons )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 int ctSize = constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 for ( i = 0 ; i < ctSize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 // spanned more than 2 subexon
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 int readLen = constraints[i].maxReadLen ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 if ( constraints[i].first + 1 < constraints[i].last )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 std::vector<int> subexonInd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 constraints[i].vector.GetOnesIndices( subexonInd ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 int size = subexonInd.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 for ( j = 1 ; j < size - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 int a = subexonInd[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 readLen -= ( subexons[a].end - subexons[a].start + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 int effectiveLength ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 if ( constraints[i].first == constraints[i].last )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 effectiveLength = ( subexons[ constraints[i].first ].end - readLen + 1 )- subexons[ constraints[i].first ].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 if ( effectiveLength <= 0 ) // this happens in the 3',5'-end subexon, where we trimmed the length
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 effectiveLength = ( subexons[ constraints[i].first ].end - subexons[ constraints[i].first ].start + 1 ) / 2 + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 int a = constraints[i].first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 int b = constraints[i].last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 int start, end ; // the range of the possible start sites of a read in subexons[a].
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 start = subexons[a].end + 1 - ( readLen - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 if ( start < subexons[a].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 start = subexons[a].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 if ( subexons[b].end - subexons[b].start + 1 >= readLen - 1 || subexons[b].rightType == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 end = subexons[a].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 end = subexons[a].end + 1 - ( readLen - ( subexons[b].end - subexons[b].start + 1 ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 if ( end < start ) // when we trimmed the subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 end = subexons[a].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 effectiveLength = end - start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 //printf( "%d: effectiveLength=%d support=%d\n", i, effectiveLength, constraints[i].support ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 constraints[i].normAbund = (double)constraints[i].weight / (double)effectiveLength ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 if ( ( subexons[ constraints[i].first ].leftType == 0 && subexons[ constraints[i].first ].end - subexons[ constraints[i].first ].start + 1 >= 8 * pAlignments->readLen )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 || ( subexons[ constraints[i].last ].rightType == 0 && subexons[ constraints[i].last ].end - subexons[ constraints[i].last ].start + 1 >= 8 * pAlignments->readLen ) ) // some random elongation of the sequence might make unnecessary long effective length.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 constraints[i].normAbund *= 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 constraints[i].abundance = constraints[i].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 ctSize = matePairs.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 for ( i = 0 ; i < ctSize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 double a = constraints[ matePairs[i].i ].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 double b = constraints[ matePairs[i].j ].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 matePairs[i].normAbund = a < b ? a : b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 if ( matePairs[i].i != matePairs[i].j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 if ( subexons[ constraints[ matePairs[i].i ].first ].leftType == 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 && constraints[ matePairs[i].i ].first == constraints[ matePairs[i].i ].last
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 && a < b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 matePairs[i].normAbund = b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 else if ( subexons[ constraints[ matePairs[i].j ].last ].rightType == 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 && constraints[ matePairs[i].j ].first == constraints[ matePairs[i].j ].last
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 && a > b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 matePairs[i].normAbund = a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 //matePairs[i].normAbund = sqrt( a * b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 matePairs[i].abundance = matePairs[i].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 int Constraints::BuildConstraints( struct _subexon *subexons, int seCnt, int start, int end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 int tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 int coalesceThreshold = 16384 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 Alignments &alignments = *pAlignments ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 // Release the memory from previous gene.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 int size = constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 if ( size > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 constraints[i].vector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 std::vector<struct _constraint>().swap( constraints ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 std::vector<struct _matePairConstraint>().swap( matePairs ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 mateReadIds.Clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 // Start to build the constraints.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 bool callNext = false ; // the last used alignment
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 if ( alignments.IsAtBegin() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 callNext = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 while ( !alignments.IsAtEnd() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 if ( callNext )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 if ( !alignments.Next() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 callNext = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 if ( alignments.GetChromId() < subexons[0].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 else if ( alignments.GetChromId() > subexons[0].chrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 // locate the first subexon in this region that overlapps with current alignment.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 for ( ; tag < seCnt && subexons[tag].end < alignments.segments[0].a ; ++tag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 if ( tag >= seCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 if ( alignments.segments[ alignments.segCnt - 1 ].b < subexons[tag].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 int uniqSupport = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 if ( usePrimaryAsUnique )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 uniqSupport = alignments.IsPrimary() ? 1 : 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 uniqSupport = alignments.IsUnique() ? 1 : 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 struct _constraint ct ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 ct.vector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 //printf( "%s %d: %lld-%lld | %d-%d\n", __func__, alignments.segCnt, alignments.segments[0].a, alignments.segments[0].b, subexons[tag].start, subexons[tag].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 ct.weight = 1.0 / alignments.GetNumberOfHits() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 if ( alignments.IsGCRich() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 ct.weight *= 10 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 ct.normAbund = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 ct.support = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 ct.uniqSupport = uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 ct.maxReadLen = alignments.GetRefCoverLength() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 if ( alignments.IsPrimary() && ConvertAlignmentToBitTable( alignments.segments, alignments.segCnt,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 subexons, seCnt, tag, ct ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 //printf( "%s ", alignments.GetReadId() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 //ct.vector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 // If the alignment has clipped end or tail. We only keep those clipped in the 3'/5'-end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 bool validClip = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 if ( alignments.HasClipHead() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 if ( ( ct.first < seCnt - 1 && subexons[ct.first].end + 1 == subexons[ct.first + 1].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 || subexons[ct.first].prevCnt > 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 || alignments.segments[0].b - alignments.segments[0].a + 1 <= alignments.GetRefCoverLength() / 3.0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 validClip = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 if ( alignments.HasClipTail() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 int tmp = alignments.segCnt - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 if ( ( ct.last > 0 && subexons[ct.last].start - 1 == subexons[ct.last - 1].end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 || subexons[ct.last].nextCnt > 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 || alignments.segments[tmp].b - alignments.segments[tmp].a + 1 <= alignments.GetRefCoverLength() / 3.0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 validClip = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 if ( validClip )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 constraints.push_back( ct ) ; // if we just coalesced but the list size does not decrease, this will force capacity increase.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 //if ( !strcmp( alignments.GetReadId(), "ERR188021.8489052" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 // ct.vector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 // Add the mate-pair information.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 int mateChrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 int64_t matePos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 alignments.GetMatePosition( mateChrId, matePos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 if ( alignments.GetChromId() == mateChrId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 if ( matePos < alignments.segments[0].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 int mateIdx = mateReadIds.Query( alignments.GetReadId(), alignments.segments[0].a ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 if ( mateIdx != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 struct _matePairConstraint nm ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 nm.i = mateIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 nm.j = constraints.size() - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 nm.abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 nm.support = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 nm.uniqSupport = uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 nm.effectiveCount = 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 matePairs.push_back( nm ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 else if ( matePos > alignments.segments[0].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 mateReadIds.Insert( alignments.GetReadId(), alignments.segments[0].a, constraints.size() - 1, matePos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 else // two mates have the same coordinate.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 if ( alignments.IsFirstMate() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 struct _matePairConstraint nm ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 nm.i = constraints.size() - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 nm.j = constraints.size() - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 nm.abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 nm.support = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 nm.uniqSupport = uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 nm.effectiveCount = 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 matePairs.push_back( nm ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 ct.vector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 // Coalesce if necessary.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 size = constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 if ( (int)size > coalesceThreshold && size == (int)constraints.capacity() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 //printf( "start coalescing. %d\n", constraints.capacity() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 CoalesceSameConstraints() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 // Not coalesce enough
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 if ( constraints.size() >= constraints.capacity() / 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 coalesceThreshold *= 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 //printf( "not compatible\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 ct.vector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 //printf( "start coalescing. %d %d\n", constraints.size(), matePairs.size() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 CoalesceSameConstraints() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 //printf( "after coalescing. %d %d\n", constraints.size(), matePairs.size() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 //for ( i = 0 ; i < matePairs.size() ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 // printf( "matePair: %d %d %d\n", matePairs[i].i, matePairs[i].j, matePairs[i].support ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 // single-end data set
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 //if ( matePairs.size() == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 if ( alignments.fragStdev == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 int size = constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 matePairs.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 struct _matePairConstraint nm ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 nm.i = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 nm.j = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 nm.abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 nm.support = constraints[i].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 nm.uniqSupport = constraints[i].uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 nm.effectiveCount = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 matePairs.push_back( nm ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424 ComputeNormAbund( subexons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 /*for ( i = 0 ; i < constraints.size() ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 printf( "constraints %d: %lf %d %d %d ", i, constraints[i].normAbund, constraints[i].first, constraints[i].last, constraints[i].support ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 constraints[i].vector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 for ( i = 0 ; i < matePairs.size() ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 printf( "mates %d: %lf %d %d %d %d\n", i, matePairs[i].normAbund, matePairs[i].i, matePairs[i].j, matePairs[i].support, matePairs[i].uniqSupport ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438 }