annotate PsiCLASS-1.0.2/TranscriptDecider.cpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #include "TranscriptDecider.hpp"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 void TranscriptDecider::OutputTranscript( int sampleId, struct _subexon *subexons, struct _transcript &transcript )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 // determine the strand
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 std::vector<int> subexonInd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 transcript.seVector.GetOnesIndices( subexonInd ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 // Determine the strand
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 char strand[2] = "." ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 int size = subexonInd.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 if ( size > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 // locate the intron showed up in this transcript.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 for ( i = 0 ; i < size - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 /*int nextCnt = subexons[ subexonInd[i] ].nextCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 if ( nextCnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 for ( j = 0 ; j < nextCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 int a = subexons[ subexonInd[i] ].next[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 if ( subexonInd[i + 1] == a
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 && subexons[ subexonInd[i] ].end + 1 < subexons[a].start ) // avoid the case like ..(...[...
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 if ( j < nextCnt )*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 if ( subexons[ subexonInd[i] ].end + 1 < subexons[ subexonInd[i + 1] ].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 if ( subexons[ subexonInd[i] ].rightStrand == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 strand[0] = '+' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 else if ( subexons[ subexonInd[i] ].rightStrand == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 strand[0] = '-' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 // TODO: transcript_id
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 char *chrom = alignments.GetChromName( subexons[0].chrId ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 char prefix[10] = "" ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 struct _subexon *catSubexons = new struct _subexon[ size + 1 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 // Concatenate adjacent subexons
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 catSubexons[0] = subexons[ subexonInd[0] ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 j = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 for ( i = 1 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 if ( subexons[ subexonInd[i] ].start == catSubexons[j - 1].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 catSubexons[j - 1].end = subexons[ subexonInd[i] ].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 catSubexons[j] = subexons[ subexonInd[i] ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 ++j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 size = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 int gid = GetTranscriptGeneId( subexonInd, subexons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 if ( 0 ) //numThreads <= 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 fprintf( outputFPs[sampleId], "%s\tCLASSES\ttranscript\t%d\t%d\t1000\t%s\t.\tgene_id \"%s%s.%d\"; transcript_id \"%s%s.%d.%d\"; Abundance \"%.6lf\";\n",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 chrom, catSubexons[0].start + 1, catSubexons[size - 1].end + 1, strand,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 prefix, chrom, gid,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 prefix, chrom, gid, transcriptId[ gid - baseGeneId ], transcript.FPKM ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 fprintf( outputFPs[ sampleId ], "%s\tCLASSES\texon\t%d\t%d\t1000\t%s\t.\tgene_id \"%s%s.%d\"; "
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 "transcript_id \"%s%s.%d.%d\"; exon_number \"%d\"; Abundance \"%.6lf\"\n",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 chrom, catSubexons[i].start + 1, catSubexons[i].end + 1, strand,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 prefix, chrom, gid,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 prefix, chrom, gid, transcriptId[ gid - baseGeneId ],
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 i + 1, transcript.FPKM ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 struct _outputTranscript t ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 int len = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 t.chrId = subexons[0].chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 t.geneId = gid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 t.transcriptId = transcriptId[ gid - baseGeneId ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 t.FPKM = transcript.FPKM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 t.sampleId = sampleId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 t.exons = new struct _pair32[size] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 t.exons[i].a = catSubexons[i].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 t.exons[i].b = catSubexons[i].end + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 len += t.exons[i].b - t.exons[i].a + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 t.cov = transcript.abundance * alignments.readLen / len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 t.ecnt = size ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 t.strand = strand[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 //printf( "%lf\n", transcript.correlationScore ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 if ( numThreads > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 outputHandler->Add( t ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 outputHandler->Add_SingleThread( t ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 ++transcriptId[ gid - baseGeneId ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 delete[] catSubexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 int TranscriptDecider::GetFather( int f, int *father )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 if ( father[f] != f )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 return father[f] = GetFather( father[f], father ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 return f ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 int TranscriptDecider::GetTranscriptGeneId( std::vector<int> &subexonInd, struct _subexon *subexons )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 int size = subexonInd.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 if ( subexons[ subexonInd[i] ].geneId != -2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 return subexons[ subexonInd[i] ].geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 // Some extreme case, where all the regions are mixture regions.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 for ( i = 0 ; i < size - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 if ( subexons[ subexonInd[i] ].end + 1 < subexons[ subexonInd[i + 1] ].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 return defaultGeneId[ ( subexons[ subexonInd[i] ].rightStrand + 1 ) / 2 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 return defaultGeneId[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 int TranscriptDecider::GetTranscriptGeneId( struct _transcript &t, struct _subexon *subexons )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 if ( subexons[ t.first ].geneId != -2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 return subexons[ t.first ].geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 if ( subexons[ t.last ].geneId != -2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 return subexons[ t.last ].geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 std::vector<int> subexonInd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 t.seVector.GetOnesIndices( subexonInd ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 return GetTranscriptGeneId( subexonInd, subexons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 void TranscriptDecider::InitTranscriptId()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 for ( i = 0 ; i < usedGeneId - baseGeneId ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 transcriptId[i] = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 bool TranscriptDecider::IsStartOfMixtureStrandRegion( int tag, struct _subexon *subexons, int seCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 int j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 int leftStrandCnt[2] = {0, 0}, rightStrandCnt[2] = {0, 0};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 for ( j = tag + 1 ; j < seCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 if ( subexons[j].start > subexons[j - 1].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 for ( k = tag ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 if ( subexons[k].leftStrand != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 ++leftStrandCnt[ ( subexons[k].leftStrand + 1 ) / 2 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 if ( subexons[k].rightStrand != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 ++rightStrandCnt[ ( subexons[k].rightStrand + 1 ) / 2 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 if ( rightStrandCnt[0] > 0 && leftStrandCnt[0] == 0 && leftStrandCnt[1] > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 if ( rightStrandCnt[1] > 0 && leftStrandCnt[1] == 0 && leftStrandCnt[0] > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 // Return 0 - uncompatible or does not overlap at all. 1 - fully compatible. 2 - Head of the constraints compatible with the tail of the transcript
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 // the partial compatible case (return 2) mostly likely happen in DP where we have partial transcript.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 int TranscriptDecider::IsConstraintInTranscript( struct _transcript transcript, struct _constraint &c )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 //printf( "%d %d, %d %d\n", c.first, c.last, transcript.first, transcript.last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 if ( c.first < transcript.first || c.first > transcript.last
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 || !transcript.seVector.Test( c.first )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 || ( !transcript.partial && !transcript.seVector.Test( c.last ) ) ) // no overlap or starts too early or some chosen subexons does not compatible
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 // Extract the subexons we should focus on.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 int s, e ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 s = c.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 e = c.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 bool returnPartial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 if ( e > transcript.last ) // constraints ends after the transcript.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 if ( transcript.partial )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 e = transcript.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 returnPartial = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 /*printf( "%s: %d %d: (%d %d) (%d %d)\n", __func__, s, e,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 transcript.seVector.Test(0), transcript.seVector.Test(1),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 c.vector.Test(0), c.vector.Test(1) ) ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 compatibleTestVectorT.Assign( transcript.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 //compatibleTestVectorT.MaskRegionOutsideInRange( s, e, transcript.first, transcript.last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 compatibleTestVectorT.MaskRegionOutside( s, e ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 compatibleTestVectorC.Assign( c.vector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 if ( c.last > transcript.last )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 //compatibleTestVectorC.MaskRegionOutsideInRange( s, e, c.first, c.last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 //compatibleTestVectorC.MaskRegionOutside( s, e ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 compatibleTestVectorC.MaskRegionOutside( 0, e ) ; // Because the bits before s are already all 0s in C.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 /*printf( "after masking %d %d. %d %d %d %d:\n", s, e, transcript.first, transcript.last, c.first, c.last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 compatibleTestVectorT.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 compatibleTestVectorC.Print() ; */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 // Test compatible.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 int ret = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 if ( compatibleTestVectorT.IsEqual( compatibleTestVectorC ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 if ( returnPartial )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 ret = 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 ret = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 return ret ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 int TranscriptDecider::IsConstraintInTranscriptDebug( struct _transcript transcript, struct _constraint &c )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 //printf( "%d %d, %d %d\n", c.first, c.last, transcript.first, transcript.last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 if ( c.first < transcript.first || c.first > transcript.last ) // no overlap or starts too early.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 printf( "hi\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 // Extract the subexons we should focus on.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 int s, e ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 s = c.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 e = c.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 bool returnPartial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 if ( e > transcript.last ) // constraints ends after the transcript.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 if ( transcript.partial )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 e = transcript.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 returnPartial = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 /*printf( "%s: %d %d: (%d %d) (%d %d)\n", __func__, s, e,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 transcript.seVector.Test(0), transcript.seVector.Test(1),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 c.vector.Test(0), c.vector.Test(1) ) ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 compatibleTestVectorT.Assign( transcript.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 compatibleTestVectorT.MaskRegionOutside( s, e ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 compatibleTestVectorC.Assign( c.vector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 if ( e > transcript.last )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 compatibleTestVectorC.MaskRegionOutside( s, e ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 /*printf( "after masking: (%d %d) (%d %d)\n",
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 compatibleTestVectorT.Test(0), compatibleTestVectorT.Test(1),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 compatibleTestVectorC.Test(0), compatibleTestVectorC.Test(1) ) ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 // Test compatible.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 int ret = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 if ( compatibleTestVectorT.IsEqual( compatibleTestVectorC ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 if ( returnPartial )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 ret = 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 ret = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 compatibleTestVectorT.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 compatibleTestVectorC.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 printf( "ret=%d\n", ret ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 return ret ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 int TranscriptDecider::SubTranscriptCount( int tag, struct _subexon *subexons, int *f )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 if ( f[tag] != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 return f[tag] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 int ret = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 if ( subexons[tag].canBeEnd )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 ret = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 for ( i = 0 ; i < subexons[tag].nextCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 ret += SubTranscriptCount( subexons[tag].next[i], subexons, f ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 if ( ret == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 ret = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 return f[tag] = ret ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 void TranscriptDecider::CoalesceSameTranscripts( std::vector<struct _transcript> &t )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 int i, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 if ( t.size() == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 std::sort( t.begin(), t.end(), CompSortTranscripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 int size = t.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 for ( i = 1 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 if ( t[k].seVector.IsEqual( t[i].seVector ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 t[k].abundance += t[i].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 t[i].seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 if ( i != k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 t[k] = t[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 t.resize( k + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 void TranscriptDecider::EnumerateTranscript( int tag, int strand, int visit[], int vcnt, struct _subexon *subexons, SubexonCorrelation &correlation, double correlationScore, std::vector<struct _transcript> &alltranscripts, int &atcnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 visit[ vcnt ] = tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 //printf( "%s: %d %d %d %d. %d %d\n", __func__, vcnt, tag, subexons[tag].nextCnt, strand, subexons[tag].start, subexons[tag].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 // Compute the correlation score
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 double minCor = correlationScore ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 for ( i = 0 ; i < vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 double tmp = correlation.Query( visit[i], visit[vcnt] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 if ( tmp < minCor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 minCor = tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 if ( subexons[tag].canBeEnd )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 struct _transcript &txpt = alltranscripts[atcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 for ( i = 0 ; i <= vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 txpt.seVector.Set( visit[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 txpt.first = visit[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 txpt.last = visit[vcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 txpt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 txpt.correlationScore = minCor ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 //printf( "%lf %d %d ", txpt.correlationScore, vcnt, visit[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 //txpt.seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 ++atcnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 for ( i = 0 ; i < subexons[tag].nextCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 int a = subexons[tag].next[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 if ( !SubexonGraph::IsSameStrand( subexons[tag].rightStrand, strand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 && subexons[a].start > subexons[tag].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 int backupStrand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 if ( subexons[a].start > subexons[tag].end + 1 && strand == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 strand = subexons[tag].rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 EnumerateTranscript( subexons[tag].next[i], strand, visit, vcnt + 1, subexons, correlation, minCor, alltranscripts, atcnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 strand = backupStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 void TranscriptDecider::SearchSubTranscript( int tag, int strand, int parents[], int pcnt, struct _dp &pdp, int visit[], int vcnt, int extends[], int extendCnt,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 std::vector<struct _constraint> &tc, int tcStartInd, struct _dpAttribute &attr )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 int size ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 double cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 bool keepSearch = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 bool belowMin = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 struct _subexon *subexons = attr.subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 visit[vcnt] = tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 ++vcnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 struct _dp visitdp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 visitdp.cover = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 struct _transcript &subTxpt = attr.bufferTxpt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 subTxpt.seVector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 for ( i = 0 ; i < pcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 subTxpt.seVector.Set( parents[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 subTxpt.first = parents[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 subTxpt.last = parents[ pcnt - 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 for ( i = 0 ; i < vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 subTxpt.seVector.Set( visit[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 subTxpt.last = visit[ vcnt - 1 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 subTxpt.partial = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 // Adjust the extendsCnt
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 /*printf( "%s: %d %d %d\n", __func__, vcnt , extendCnt, extends[ extendCnt - 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 subTxpt.seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 tc[extends[extendCnt - 1]].vector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 printf( "Adjust extend:\n") ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 for ( i = extendCnt - 1 ; i >= 0 ; --i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 if ( tc[ extends[i] ].last <= tag || ( tc[ extends[i] ].vector.Test( tag ) && IsConstraintInTranscript( subTxpt, tc[ extends[i] ] ) != 0 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 extendCnt = i + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 // If the extension ends.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 subTxpt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 if ( subexons[tag].nextCnt > 0 && ( extendCnt == 0 || tag >= tc[ extends[ extendCnt - 1 ] ].last ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 // Solve the subtranscript beginning with visit.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 // Now we got the optimal transcript for visit.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 visitdp = SolveSubTranscript( visit, vcnt, strand, tc, tcStartInd, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 keepSearch = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 //printf( "%s %d %d: visitdp.cover=%lf\n", __func__, parents[0], tag, visitdp.cover ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 // the constraints across the parents and visit.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 size = tc.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 if ( visitdp.cover >= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 cover = visitdp.cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430 // Reset the subTxpt, since its content is modofitied in SolveSubTxpt called above.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431 subTxpt.seVector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 for ( i = 0 ; i < pcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 subTxpt.seVector.Set( parents[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 subTxpt.seVector.Or( visitdp.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 subTxpt.first = parents[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 subTxpt.last = visitdp.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 subTxpt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 if ( !attr.forAbundance && attr.minAbundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 for ( i = 0 ; i < pcnt - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443 if ( attr.uncoveredPair.find( parents[i] * attr.seCnt + parents[i + 1] ) != attr.uncoveredPair.end() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 belowMin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446 for ( i = -1 ; i < vcnt - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448 if ( i == -1 && pcnt >= 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 if ( attr.uncoveredPair.find( parents[pcnt - 1] * attr.seCnt + visit[0] ) != attr.uncoveredPair.end() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 belowMin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 if ( attr.uncoveredPair.find( visit[i] * attr.seCnt + visit[i + 1] ) != attr.uncoveredPair.end() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 belowMin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459 if ( attr.forAbundance && belowMin )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 cover = 1e-6 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463 for ( i = tcStartInd ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 if ( tc[i].first > parents[ pcnt - 1] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 if ( IsConstraintInTranscript( subTxpt, tc[i] ) == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470 if ( tc[i].normAbund <= attr.minAbundance )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 belowMin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 cover = -2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477 if ( tc[i].abundance <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 if ( attr.forAbundance )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 if ( tc[i].normAbund < cover || cover == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 cover = tc[i].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 ++cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 if ( belowMin && pdp.cover == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 pdp.cover = -2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494 pdp.seVector.Assign( subTxpt.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 pdp.first = subTxpt.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 pdp.last = subTxpt.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 pdp.strand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 else if ( cover > pdp.cover )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501 pdp.cover = cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 pdp.seVector.Assign( subTxpt.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 pdp.first = subTxpt.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 pdp.last = subTxpt.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505 pdp.strand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 else if ( visitdp.cover == -2 && pdp.cover == -1 ) // no valid extension from visit
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510 subTxpt.seVector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 for ( i = 0 ; i < pcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512 subTxpt.seVector.Set( parents[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513 subTxpt.seVector.Or( visitdp.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 subTxpt.first = parents[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 subTxpt.last = visitdp.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517 pdp.cover = -2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518 pdp.seVector.Assign( subTxpt.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519 pdp.first = subTxpt.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520 pdp.last = subTxpt.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521 pdp.strand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 if ( subexons[tag].canBeEnd && ( visitdp.cover < 0 || attr.forAbundance ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525 // This works is because that the extension always covers more constraints. So we only go this branch if the extension does not work
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 // and it goes this branch if it violates minAbundance
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 // But we need to go here when we want to compute the maxAbundance transcript.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528 // This part also works as the exit point of the recurive function.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530 bool belowMin = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531 subTxpt.seVector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532 for ( i = 0 ; i < pcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533 subTxpt.seVector.Set( parents[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534 for ( i = 0 ; i < vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535 subTxpt.seVector.Set( visit[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536 subTxpt.first = parents[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 subTxpt.last = visit[ vcnt - 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 subTxpt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540 cover = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541 if ( attr.forAbundance || attr.minAbundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543 for ( i = 0 ; i < pcnt - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
545 if ( attr.uncoveredPair.find( parents[i] * attr.seCnt + parents[i + 1] ) != attr.uncoveredPair.end() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
546 belowMin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
547 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
548 for ( i = -1 ; i < vcnt - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
549 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
550 if ( i == -1 && pcnt >= 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
551 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
552 if ( attr.uncoveredPair.find( parents[pcnt - 1] * attr.seCnt + visit[0] ) != attr.uncoveredPair.end() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
553 belowMin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
554 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
555 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
556 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
557 if ( attr.uncoveredPair.find( visit[i] * attr.seCnt + visit[i + 1] ) != attr.uncoveredPair.end() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
558 belowMin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
559 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
560 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
561
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
562 //if ( belowMin == true )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
563 // printf( "turned belowMin. %d. %d %d: %d %d %d\n", attr.uncoveredPair.size(), pcnt, vcnt, parents[0], visit[0], visit[ vcnt - 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
564
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
565 if ( attr.forAbundance && belowMin )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
566 cover = 1e-6 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
567 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
568
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
569 for ( i = tcStartInd ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
570 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
571 // note that the value is parents[ pcnt - 1], because
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
572 // in above the part of "visit" is computed in SolveSubTranscript( visit ).
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
573 if ( tc[i].first > visit[ vcnt - 1] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
574 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
575 if ( IsConstraintInTranscript( subTxpt, tc[i] ) == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
576 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
577 if ( tc[i].normAbund <= attr.minAbundance )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
578 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
579 belowMin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
580 cover = -2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
581 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
582 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
583
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
584 if ( tc[i].abundance <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
585 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
586 if ( attr.forAbundance )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
587 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
588 if ( tc[i].normAbund < cover || cover == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
589 cover = tc[i].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
590 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
591 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
592 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
593 ++cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
594 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
595 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
596 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
597
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
598 if ( belowMin && pdp.cover == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
599 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
600 pdp.cover = -2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
601 pdp.seVector.Assign( subTxpt.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
602 pdp.first = subTxpt.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
603 pdp.last = subTxpt.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
604 pdp.strand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
605 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
606 else if ( cover > pdp.cover )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
607 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
608 pdp.cover = cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
609 pdp.seVector.Assign( subTxpt.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
610 pdp.first = subTxpt.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
611 pdp.last = subTxpt.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
612 pdp.strand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
613 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
614 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
615 //printf( "%s %d: pdp.cover=%lf\n", __func__, tag, pdp.cover ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
616
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
617 // keep searching.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
618 if ( keepSearch )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
619 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
620 for ( i = 0 ; i < subexons[tag].nextCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
621 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
622 int b = subexons[tag].next[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
623 if ( ( SubexonGraph::IsSameStrand( subexons[tag].rightStrand, strand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
624 && SubexonGraph::IsSameStrand( subexons[b].leftStrand, strand ) ) ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
625 subexons[b].start == subexons[tag].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
626 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
627 int backupStrand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
628 if ( subexons[b].start > subexons[tag].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
629 strand = subexons[tag].rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
630
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
631 SearchSubTranscript( subexons[tag].next[i], strand, parents, pcnt, pdp, visit, vcnt,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
632 extends, extendCnt, tc, tcStartInd, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
633 strand = backupStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
634 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
635 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
636
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
637 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
638
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
639 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
640 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
641
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
642 struct _dp TranscriptDecider::SolveSubTranscript( int visit[], int vcnt, int strand, std::vector<struct _constraint> &tc, int tcStartInd, struct _dpAttribute &attr )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
643 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
644 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
645 int size ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
646 /*printf( "%s: ", __func__ ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
647 for ( i = 0 ; i < vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
648 printf( "%d ", visit[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
649 printf( ": %lf %d %d", attr.f1[ visit[0] ].cover, attr.f1[ visit[0] ].timeStamp, attr.timeStamp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
650 printf( "\n" ) ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
651 // Test whether it is stored in dp
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
652 if ( vcnt == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
653 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
654 if ( attr.f1[ visit[0] ].cover != -1 && attr.f1[ visit[0] ].strand == strand && ( attr.f1[ visit[0] ].timeStamp == attr.timeStamp ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
655 ( attr.f1[ visit[0] ].minAbundance < attr.minAbundance && attr.f1[visit[0]].cover == -2 ) ) ) //even given lower minAbundance threshold, it fails
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
656 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
657 return attr.f1[ visit[0] ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
658 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
659 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
660 else if ( vcnt == 2 && attr.f2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
661 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
662 int a = visit[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
663 int b = visit[1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
664
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
665 if ( attr.f2[a][b].cover != -1 && attr.f2[a][b].strand == strand && ( attr.f2[a][b].timeStamp == attr.timeStamp ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
666 ( attr.f2[a][b].minAbundance < attr.minAbundance && attr.f2[a][b].cover == -2 ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
667 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
668 return attr.f2[a][b] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
669 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
670 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
671 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
672 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
673 int key = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
674 for ( i = 0 ; i < vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
675 key = ( key * attr.seCnt + visit[i] ) % hashMax ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
676 if ( key < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
677 key += hashMax ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
678
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
679 if ( attr.hash[key].cover != -1 && attr.hash[key].cnt == vcnt && attr.hash[key].strand == strand &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
680 ( attr.hash[key].first == visit[0] ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
681 ( attr.hash[key].timeStamp == attr.timeStamp ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
682 ( attr.hash[key].minAbundance < attr.minAbundance && attr.hash[key].cover == -2 ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
683 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
684 struct _transcript subTxpt = attr.bufferTxpt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
685 subTxpt.seVector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
686 for ( i = 0 ; i < vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
687 subTxpt.seVector.Set( visit[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
688 //subTxpt.seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
689 //attr.hash[key].seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
690 subTxpt.seVector.Xor( attr.hash[key].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
691 subTxpt.seVector.MaskRegionOutside( visit[0], visit[ vcnt - 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
692 //printf( "hash test: %d %d\n", key, subTxpt.seVector.IsAllZero() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
693 if ( subTxpt.seVector.IsAllZero() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
694 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
695 return attr.hash[key] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
696 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
697
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
698 // Can't use the code below, because vcnt is the header of subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
699 /*for ( i = 0 ; i < vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
700 if ( !attr.hash[key].seVector.Test( visit[i] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
701 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
702 if ( i >= vcnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
703 return attr.hash[key] ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
704
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
705 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
706 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
707 // adjust tcStartInd
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
708 size = tc.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
709 for ( i = tcStartInd ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
710 if ( tc[i].first >= visit[0] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
711 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
712 tcStartInd = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
713
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
714
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
715 struct _subexon *subexons = attr.subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
716 struct _dp visitdp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
717 visitdp.seVector.Init( attr.seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
718 visitdp.cover = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
719
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
720 struct _transcript &subTxpt = attr.bufferTxpt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
721 // This happens when it is called from PickTranscriptsByDP, the first subexon might be the end.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
722 subTxpt.seVector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
723 for ( i = 0 ; i < vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
724 subTxpt.seVector.Set( visit[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
725 subTxpt.first = visit[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
726 subTxpt.last = visit[vcnt - 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
727
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
728 if ( subexons[ visit[vcnt - 1] ].canBeEnd )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
729 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
730 subTxpt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
731 double cover = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
732 for ( i = tcStartInd ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
733 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
734 if ( tc[i].first > subTxpt.last )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
735 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
736
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
737 if ( IsConstraintInTranscript( subTxpt, tc[i] ) == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
738 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
739 if ( tc[i].normAbund <= attr.minAbundance )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
740 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
741 cover = -2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
742 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
743 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
744
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
745 if ( tc[i].abundance <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
746 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
747 if ( attr.forAbundance )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
748 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
749 if ( tc[i].normAbund < cover || cover == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
750 cover = tc[i].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
751 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
752 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
753 ++cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
754 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
755 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
756
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
757 visitdp.seVector.Assign( subTxpt.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
758 visitdp.cover = cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
759 visitdp.first = subTxpt.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
760 visitdp.last = subTxpt.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
761 visitdp.strand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
762 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
763
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
764 // Now we extend.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
765 size = tc.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
766 int *extends = new int[tc.size() - tcStartInd + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
767 int extendCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
768 subTxpt.partial = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
769 for ( i = tcStartInd ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
770 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
771 if ( tc[i].first > subTxpt.last )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
772 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
773 if ( IsConstraintInTranscript( subTxpt, tc[i] ) == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
774 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
775 extends[extendCnt] = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
776 ++extendCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
777 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
778 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
779
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
780 // Sort the extend by the index of the last subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
781 if ( extendCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
782 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
783 struct _pair32 *extendsPairs = new struct _pair32[extendCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
784
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
785 for ( i = 0 ; i < extendCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
786 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
787 extendsPairs[i].a = extends[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
788 extendsPairs[i].b = tc[ extends[i] ].last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
789 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
790 qsort( extendsPairs, extendCnt, sizeof( struct _pair32 ), CompPairsByB ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
791
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
792 for ( i = 0 ; i < extendCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
793 extends[i] = extendsPairs[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
794
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
795 delete[] extendsPairs ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
796 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
797
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
798 size = subexons[ visit[vcnt - 1] ].nextCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
799 int nextvCnt = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
800 if ( extendCnt > 0 && tc[ extends[ extendCnt - 1 ] ].last - visit[ vcnt - 1 ] > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
801 nextvCnt = tc[ extends[ extendCnt - 1 ] ].last - visit[ vcnt - 1 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
802 int *nextv = new int[ nextvCnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
803 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
804 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
805 int a = visit[vcnt - 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
806 int b = subexons[a].next[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
807 if ( ( SubexonGraph::IsSameStrand( subexons[a].rightStrand, strand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
808 && SubexonGraph::IsSameStrand( subexons[b].leftStrand, strand ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
809 ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
810 subexons[b].start == subexons[a].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
811 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
812 int backupStrand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
813 if ( subexons[b].start > subexons[a].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
814 strand = subexons[a].rightStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
815 SearchSubTranscript( subexons[ visit[vcnt - 1] ].next[i], strand, visit, vcnt, visitdp, nextv, 0, extends, extendCnt, tc, tcStartInd, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
816 strand = backupStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
817
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
818 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
819 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
820 //printf( "%s %d(%d) %d %d %d: %lf\n", __func__, visit[0], subexons[ visit[vcnt - 1] ].canBeEnd, size, extendCnt, strand, visitdp.cover ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
821 delete[] nextv ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
822 delete[] extends ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
823
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
824 // store the result in the dp structure.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
825 // We return the structure stored in dp to simplify the memory access pattern.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
826 // In other words, we assume the structure returned from this function always uses the memory from attr.dp
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
827 if ( vcnt == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
828 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
829 SetDpContent( attr.f1[ visit[0] ], visitdp, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
830 visitdp.seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
831 return attr.f1[ visit[0] ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
832 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
833 else if ( vcnt == 2 && attr.f2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
834 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
835 SetDpContent( attr.f2[ visit[0] ][ visit[1] ], visitdp, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
836 visitdp.seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
837 return attr.f2[ visit[0] ][ visit[1] ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
838 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
839 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
840 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
841 int key = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
842 for ( i = 0 ; i < vcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
843 key = ( key * attr.seCnt + visit[i] ) % hashMax ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
844 if ( key < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
845 key += hashMax ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
846
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
847 //static int hashUsed = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
848 //if ( attr.hash[key].cover == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
849 // ++hashUsed ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
850 //printf( "%d/%d\n", hashUsed, HASH_MAX) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
851 //printf( "hash write: %d\n", key ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
852 SetDpContent( attr.hash[key], visitdp, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
853 attr.hash[key].cnt = vcnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
854 visitdp.seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
855 return attr.hash[key] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
856 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
857 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
858
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
859
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
860 void TranscriptDecider::PickTranscriptsByDP( struct _subexon *subexons, int seCnt, int iterBound, Constraints &constraints, SubexonCorrelation &correlation, struct _dpAttribute &attr, std::vector<struct _transcript> &alltranscripts )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
861 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
862 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
863
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
864 std::vector<struct _transcript> transcripts ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
865 std::vector<struct _constraint> &tc = constraints.constraints ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
866 int tcCnt = tc.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
867 int coalesceThreshold = 1024 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
868
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
869 //printf( "tcCnt=%d\n", tcCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
870
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
871 attr.timeStamp = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
872 attr.bufferTxpt.seVector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
873 attr.subexons = subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
874 attr.seCnt = seCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
875
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
876 double maxAbundance = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
877 // Initialize the dp data structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
878 /*memset( attr.f1, -1, sizeof( struct _dp ) * seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
879 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
880 memset( attr.f2[i], -1, sizeof( struct _dp ) * seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
881 memset( attr.hash, -1, sizeof( struct _dp ) * HASH_MAX ) ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
882 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
883 ResetDpContent( attr.f1[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
884 for ( i = 0 ; i < seCnt && attr.f2 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
885 for ( j = i ; j < seCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
886 ResetDpContent( attr.f2[i][j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
887 for ( i = 0 ; i < hashMax ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
888 ResetDpContent( attr.hash[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
889
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
890 // Set the uncovered pair
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
891 attr.uncoveredPair.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
892 BitTable bufferTable( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
893 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
894 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
895 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
896 for ( ; k < tcCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
897 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
898 if ( tc[k].last >= i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
899 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
900 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
901
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
902 if ( k >= tcCnt || tc[k].first > i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
903 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
904 for ( j = 0 ; j < subexons[i].nextCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
905 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
906 attr.uncoveredPair[i * seCnt + subexons[i].next[j] ] = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
907 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
908 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
909 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
910
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
911 for ( j = 0 ; j < subexons[i].nextCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
912 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
913 bool covered = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
914 int l, n ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
915
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
916 n = subexons[i].next[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
917 for ( l = k ; l < tcCnt ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
918 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
919 if ( tc[l].first > i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
920 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
921 if ( tc[l].vector.Test( i ) && tc[l].vector.Test( n ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
922 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
923 if ( n == i + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
924 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
925 covered = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
926 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
927 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
928 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
929 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
930 bufferTable.Assign( tc[l].vector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
931 bufferTable.MaskRegionOutside( i + 1, n - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
932 if ( bufferTable.IsAllZero() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
933 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
934 covered = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
935 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
936 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
937 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
938 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
939 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
940
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
941 if ( !covered )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
942 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
943 //printf( "set!: (%d: %d %d) (%d: %d %d)\n", i, subexons[i].start, subexons[i].end, n, subexons[n].start, subexons[n].end ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
944 attr.uncoveredPair[ i * seCnt + n ] = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
945 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
946 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
947 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
948 bufferTable.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
949
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
950
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
951 // Find the max abundance
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
952 attr.forAbundance = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
953 attr.minAbundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
954 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
955 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
956 if ( subexons[i].canBeStart )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
957 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
958 int visit[1] = {i} ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
959 struct _dp tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
960
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
961 tmp = SolveSubTranscript( visit, 1, 0, tc, 0, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
962
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
963 if ( tmp.cover > maxAbundance )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
964 maxAbundance = tmp.cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
965 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
966 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
967 //PrintLog( "maxAbundance=%lf", maxAbundance ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
968 //exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
969
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
970 // Pick the transcripts. Quantative Set-Cover
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
971 // Notice that by the logic in SearchSubTxpt and SolveSubTxpt, we don't need to reinitialize the data structure.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
972 attr.forAbundance = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
973 int *coveredTc = new int[tcCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
974 int coveredTcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
975 struct _dp maxCoverDp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
976 struct _dp bestDp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
977 std::map<double, struct _dp> cachedCoverResult ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
978
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
979 maxCoverDp.seVector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
980 bestDp.seVector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
981 int iterCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
982
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
983 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
984 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
985 double bestScore ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
986
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
987 // iterately assign constraints
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
988 attr.minAbundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
989
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
990 // Find the best candidate transcript.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
991 bestDp.cover = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
992 bestScore = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
993 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
994 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
995 // iterate the change of minAbundance
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
996 if ( cachedCoverResult.find( attr.minAbundance ) != cachedCoverResult.end() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
997 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
998 struct _dp tmp = cachedCoverResult[ attr.minAbundance ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
999 SetDpContent( maxCoverDp, tmp, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1000 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1001 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1002 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1003 maxCoverDp.cover = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1004 ++attr.timeStamp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1005 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1006 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1007 if ( subexons[i].canBeStart == false )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1008 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1009 int visit[1] = {i} ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1010 struct _dp tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1011 tmp = SolveSubTranscript( visit, 1, 0, tc, 0, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1012
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1013 if ( tmp.cover > maxCoverDp.cover && tmp.cover > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1014 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1015 SetDpContent( maxCoverDp, tmp, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1016 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1017 //if ( subexons[i].start == 6870264 || subexons[i].start == 6872237 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1018 // printf( "%d: %lf\n", i, tmp.cover ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1019 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1020
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1021 if ( maxCoverDp.cover == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1022 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1023 struct _dp ccr ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1024 ccr.seVector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1025 SetDpContent( ccr, maxCoverDp, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1026 cachedCoverResult[ attr.minAbundance ] = ccr ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1027 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1028 // the abundance for the max cover txpt.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1029 double min = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1030 struct _transcript &subTxpt = attr.bufferTxpt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1031 subTxpt.seVector.Assign( maxCoverDp.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1032 subTxpt.first = maxCoverDp.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1033 subTxpt.last = maxCoverDp.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1034
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1035 for ( i = 0 ; i < tcCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1036 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1037 if ( IsConstraintInTranscript( subTxpt, tc[i] ) == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1038 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1039 if ( tc[i].normAbund < min || min == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1040 min = tc[i].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1041 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1042 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1043
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1044 if ( attr.minAbundance == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1045 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1046 std::vector<int> subexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1047 maxCoverDp.seVector.GetOnesIndices( subexonIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1048 int size = subexonIdx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1049 for ( i = 0 ; i < size - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1050 if ( attr.uncoveredPair.find( subexonIdx[i] * seCnt + subexonIdx[i + 1] ) != attr.uncoveredPair.end() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1051 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1052 min = 1e-6 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1053 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1054 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1055 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1056
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1057 double score = ComputeScore( maxCoverDp.cover, 1.0, min, maxAbundance, 0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1058 if ( bestScore == -1 || score > bestScore )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1059 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1060 bestScore = score ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1061 SetDpContent( bestDp, maxCoverDp, attr ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1062 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1063 else if ( score < bestScore )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1064 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1065 if ( ComputeScore( maxCoverDp.cover, 1.0, maxAbundance, maxAbundance, 0 ) < bestScore )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1066 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1067 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1068 //PrintLog( "normAbund=%lf maxCoverDp.cover=%lf score=%lf timeStamp=%d", min, maxCoverDp.cover, score, attr.timeStamp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1069 attr.minAbundance = min ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1070 } // end of iteration for minAbundance.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1071
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1072 if ( bestDp.cover == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1073 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1074 // Assign the constraints.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1075 coveredTcCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1076 double update = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1077 struct _transcript &subTxpt = attr.bufferTxpt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1078 subTxpt.seVector.Assign( bestDp.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1079 subTxpt.first = bestDp.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1080 subTxpt.last = bestDp.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1081 subTxpt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1082 for ( i = 0 ; i < tcCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1083 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1084 if ( IsConstraintInTranscript( subTxpt, tc[i] ) == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1085 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1086 if ( tc[i].abundance > 0 &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1087 ( tc[i].abundance < update || update == -1 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1088 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1089 update = tc[i].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1090 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1091 coveredTc[ coveredTcCnt ] = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1092 ++coveredTcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1093 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1094 /*else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1095 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1096 printf( "%d: ", i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1097 tc[i].vector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1098 if ( i == 127 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1099 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1100 printf( "begin debug:\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1101 IsConstraintInTranscriptDebug( subTxpt, tc[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1102 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1103 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1104 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1105 update *= ( 1 + iterCnt / 50 ) ;//* ( 1 + iterCnt / 50 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1106
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1107 //PrintLog( "%d: update=%lf %d %d. %d %d %d", iterCnt, update, coveredTcCnt, tcCnt,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1108 // bestDp.first, bestDp.last, subexons[ bestDp.first ].start ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1109 //bestDp.seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1110
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1111 struct _transcript nt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1112 nt.seVector.Duplicate( bestDp.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1113 nt.first = bestDp.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1114 nt.last = bestDp.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1115 nt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1116 nt.abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1117 for ( i = 0 ; i < coveredTcCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1118 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1119 j = coveredTc[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1120 if ( tc[j].abundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1121 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1122 double tmp = ( tc[j].abundance > update ? update : tc[j].abundance ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1123 tc[j].abundance -= tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1124 double factor = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1125
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1126 nt.abundance += ( tc[j].support * update / tc[j].normAbund * factor ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1127
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1128 if ( tc[j].abundance <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1129 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1130 std::vector<double> removeKey ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1131 for ( std::map<double, struct _dp>::iterator it = cachedCoverResult.begin() ; it != cachedCoverResult.end() ; ++it )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1132 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1133 subTxpt.seVector.Assign( it->second.seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1134 subTxpt.first = it->second.first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1135 subTxpt.last = it->second.last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1136 subTxpt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1137 if ( IsConstraintInTranscript( subTxpt, tc[j] ) == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1138 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1139 it->second.seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1140 removeKey.push_back( it->first ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1141 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1142 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1143 int size = removeKey.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1144 int l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1145 for ( l = 0 ; l < size ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1146 cachedCoverResult.erase( removeKey[l] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1147 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1148 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1149
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1150 if ( tc[j].abundance < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1151 tc[j].abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1152 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1153
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1154 transcripts.push_back( nt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1155 if ( transcripts.size() >= transcripts.capacity() && (int)transcripts.size() >= coalesceThreshold )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1156 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1157 CoalesceSameTranscripts( transcripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1158 if ( transcripts.size() >= transcripts.capacity() / 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1159 coalesceThreshold *= 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1160 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1161 ++iterCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1162
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1163 if ( iterCnt >= iterBound )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1164 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1165 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1166 CoalesceSameTranscripts( transcripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1167 int size = transcripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1168 // Compute the correlation score
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1169 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1170 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1171 std::vector<int> subexonInd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1172 transcripts[i].seVector.GetOnesIndices( subexonInd ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1173 double cor = 2.0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1174 int s = subexonInd.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1175 for ( j = 0 ; j < s ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1176 for ( k = j + 1 ; k < s ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1177 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1178 double tmp = correlation.Query( subexonInd[j], subexonInd[k] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1179 if ( tmp < cor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1180 cor = tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1181 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1182 if ( cor > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1183 cor = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1184 transcripts[i].correlationScore = cor ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1185 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1186
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1187 // store the result
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1188 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1189 alltranscripts.push_back( transcripts[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1190
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1191 // Release the memory
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1192 for ( std::map<double, struct _dp>::iterator it = cachedCoverResult.begin() ; it != cachedCoverResult.end() ; ++it )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1193 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1194 it->second.seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1195 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1196 attr.bufferTxpt.seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1197
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1198 delete[] coveredTc ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1199 maxCoverDp.seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1200 bestDp.seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1201 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1202
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1203
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1204 // Add the preifx/suffix of transcripts to the list
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1205 void TranscriptDecider::AugmentTranscripts( struct _subexon *subexons, std::vector<struct _transcript> &alltranscripts, int limit, bool extend )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1206 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1207 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1208 int size = alltranscripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1209 if ( size >= limit )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1210 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1211
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1212 // Augment suffix, prefix transcripts
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1213 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1214 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1215 std::vector<int> subexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1216 alltranscripts[i].seVector.GetOnesIndices( subexonIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1217 int seIdxCnt = subexonIdx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1218 // suffix
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1219 for ( j = 1 ; j < seIdxCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1220 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1221 if ( subexons[ subexonIdx[j] ].canBeStart )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1222 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1223 struct _transcript nt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1224 nt.first = subexonIdx[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1225 nt.last = alltranscripts[i].last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1226 nt.seVector.Duplicate( alltranscripts[i].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1227 nt.seVector.MaskRegionOutside( nt.first, nt.last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1228 nt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1229 nt.correlationScore = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1230 nt.abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1231 nt.constraintsSupport = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1232
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1233 alltranscripts.push_back( nt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1234 if ( alltranscripts.size() >= limit )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1235 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1236 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1237 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1238
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1239 // prefix
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1240 for ( j = 0 ; j < seIdxCnt - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1241 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1242 if ( subexons[ subexonIdx[j] ].canBeEnd )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1243 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1244 struct _transcript nt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1245 nt.first = alltranscripts[i].first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1246 nt.last = subexonIdx[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1247 nt.seVector.Duplicate( alltranscripts[i].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1248 nt.seVector.MaskRegionOutside( nt.first, nt.last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1249 nt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1250 nt.correlationScore = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1251 nt.abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1252 nt.constraintsSupport = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1253
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1254 alltranscripts.push_back( nt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1255 if ( alltranscripts.size() >= limit )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1256 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1257 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1258 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1259
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1260 if ( extend )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1261 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1262 //Extentions right.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1263 for ( j = 0 ; j < seIdxCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1264 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1265 if ( subexons[ subexonIdx[j] ].nextCnt > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1266 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1267 for ( k = 0 ; k < subexons[ subexonIdx[j] ].nextCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1268 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1269 int idx = subexons[ subexonIdx[j] ].next[k] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1270
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1271 if ( alltranscripts[i].seVector.Test( idx ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1272 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1273 int l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1274 std::vector<int> visited ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1275 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1276 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1277 if ( subexons[idx].nextCnt > 1 || subexons[idx].prevCnt > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1278 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1279 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1280 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1281
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1282 visited.push_back( idx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1283 if ( subexons[idx].canBeEnd && subexons[idx].nextCnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1284 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1285 struct _transcript nt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1286 nt.first = alltranscripts[i].first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1287 nt.last = idx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1288 nt.seVector.Duplicate( alltranscripts[i].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1289 nt.seVector.MaskRegionOutside( nt.first, subexonIdx[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1290 int visitedSize = visited.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1291 for ( l = 0 ; l < visitedSize ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1292 nt.seVector.Set( visited[l] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1293 nt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1294 nt.correlationScore = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1295 nt.abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1296 nt.constraintsSupport = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1297
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1298 alltranscripts.push_back( nt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1299 if ( alltranscripts.size() >= limit )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1300 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1301 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1302
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1303 if ( subexons[idx].nextCnt == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1304 idx = subexons[idx].next[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1305 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1306 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1307 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1308 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1309 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1310 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1311
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1312 // Extension towards left
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1313 for ( j = 0 ; j < seIdxCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1314 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1315 if ( subexons[ subexonIdx[j] ].prevCnt > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1316 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1317 for ( k = 0 ; k < subexons[ subexonIdx[j] ].prevCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1318 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1319 int idx = subexons[ subexonIdx[j] ].prev[k] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1320
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1321 if ( alltranscripts[i].seVector.Test( idx ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1322 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1323 int l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1324 std::vector<int> visited ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1325 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1326 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1327 if ( subexons[idx].nextCnt > 1 || subexons[idx].prevCnt > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1328 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1329 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1330 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1331
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1332 visited.push_back( idx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1333 if ( subexons[idx].canBeStart && subexons[idx].prevCnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1334 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1335 struct _transcript nt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1336 nt.first = idx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1337 nt.last = alltranscripts[i].last ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1338 nt.seVector.Duplicate( alltranscripts[i].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1339 nt.seVector.MaskRegionOutside( subexonIdx[j], nt.last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1340 int visitedSize = visited.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1341 for ( l = 0 ; l < visitedSize ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1342 nt.seVector.Set( visited[l] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1343 nt.partial = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1344 nt.correlationScore = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1345 nt.abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1346 nt.constraintsSupport = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1347
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1348 alltranscripts.push_back( nt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1349 if ( alltranscripts.size() >= limit )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1350 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1351 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1352
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1353 if ( subexons[idx].prevCnt == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1354 idx = subexons[idx].prev[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1355 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1356 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1357 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1358 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1359 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1360 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1361 } // for if-extend
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1362 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1363
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1364 CoalesceSameTranscripts( alltranscripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1365 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1366
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1367 // Pick the transcripts from given transcripts.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1368 void TranscriptDecider::PickTranscripts( struct _subexon *subexons, std::vector<struct _transcript> &alltranscripts, Constraints &constraints,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1369 SubexonCorrelation &seCorrelation, std::vector<struct _transcript> &transcripts )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1370 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1371 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1372 std::vector<int> chosen ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1373 std::vector<struct _matePairConstraint> &tc = constraints.matePairs ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1374 int atcnt = alltranscripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1375 int tcCnt = tc.size() ; // transcript constraints
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1376 int seCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1377
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1378 if ( tcCnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1379 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1380 if ( atcnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1381 seCnt = alltranscripts[0].seVector.GetSize() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1382 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1383 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1384
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1385 double inf = -1 ; // infinity
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1386 int coalesceThreshold = 1024 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1387 int *transcriptSeCnt = new int[ atcnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1388 int *transcriptLength = new int[atcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1389 double *transcriptAbundance = new double[atcnt] ; // the roughly estimated abundance based on constraints.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1390 double *avgTranscriptAbundance = new double[atcnt] ; // the average normAbund from the compatible constraints.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1391
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1392 BitTable *btable = new BitTable[ atcnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1393 //BitTable lowCovSubexon ; // force the abundance to 0 for the transcript contains the subexon.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1394 double *coveredPortion = new double[atcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1395
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1396 memset( avgTranscriptAbundance, 0 ,sizeof( double ) * atcnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1397 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1398 btable[i].Init( tcCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1399 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1400 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1401 int a = constraints.matePairs[j].i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1402 int b = constraints.matePairs[j].j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1403
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1404 if ( constraints.constraints[a].support > inf )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1405 inf = constraints.constraints[a].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1406 if ( constraints.constraints[b].support > inf )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1407 inf = constraints.constraints[b].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1408
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1409 if ( tc[j].normAbund > inf )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1410 inf = tc[j].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1411
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1412 tc[j].abundance = tc[j].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1413 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1414 ++inf ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1415 bool btableSet = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1416 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1417 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1418 //printf( "correlation %d: %lf\n", i, alltranscripts[i].correlationScore ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1419 /*for ( int l = 0 ; l < subexonInd.size() ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1420 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1421 for ( int m = l ; m < subexonInd.size() ; ++m )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1422 printf( "%lf ", seCorrelation.Query( l, m ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1423 printf( "\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1424 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1425
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1426 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1427 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1428 int a = tc[j].i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1429 int b = tc[j].j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1430
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1431 //printf( "try set btble[ %d ].Set( %d ): %d %d\n", i, j, a, b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1432 //alltranscripts[i].seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1433 //constraints.constraints[a].vector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1434 //constraints.constraints[b].vector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1435 if ( IsConstraintInTranscript( alltranscripts[i], constraints.constraints[a] ) == 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1436 && IsConstraintInTranscript( alltranscripts[i], constraints.constraints[b] ) == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1437 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1438 //printf( "set btble[ %d ].Set( %d ): %d %d\n", i, j, a, b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1439 btable[i].Set( j ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1440 btableSet = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1441 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1442 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1443 transcriptSeCnt[i] = alltranscripts[i].seVector.Count() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1444 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1445 if ( btableSet == false )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1446 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1447 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1448 btable[i].Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1449 delete[] btable ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1450 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1451 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1452
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1453 double maxAbundance = -1 ; // The abundance of the most-abundant transcript
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1454 double *adjustScore = new double[atcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1455 memset( adjustScore, 0, sizeof( double ) * atcnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1456 if ( atcnt > 0 /*&& alltranscripts[0].abundance == -1*/ )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1457 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1458 struct _pair32 *chain = new struct _pair32[seCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1459 bool *covered = new bool[seCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1460 bool *usedConstraints = new bool[constraints.constraints.size() ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1461 std::vector<BitTable> togetherChain ; // those subexons is more likely to show up in the same transcript, like an IR with overhang, should be together to represent a 3'/5'-end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1462
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1463 /*lowCovSubexon.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1464 double *avgDepth = new double[seCnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1465
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1466 memset( avgDepth, 0, sizeof( double ) * seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1467 int size = constraints.constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1468 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1469 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1470 std::vector<int> subexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1471 constraints.constraints[i].GetOnesIndices( subexonIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1472 int seIdxCnt = subexonidx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1473 for ( j = 0 ; j < seIdxCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1474 avgDepth[ subexonidx[j] ] += constraints.constraints[i].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1475 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1476 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1477 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1478 if ( avgDepth[i] * alignments.readLen / (double)( subexons[i].end - subexons[i].start + 1 ) < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1479 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1480
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1481 struct _pair32 firstRegion, lastRegion ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1482
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1483 for ( i = 0 ; i < seCnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1484 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1485 for ( j = i + 1 ; j < seCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1486 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1487 if ( subexons[j].start > subexons[j - 1].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1488 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1489 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1490
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1491
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1492 int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1493 for ( k = i ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1494 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1495 if ( ( subexons[k].leftType == 2 && subexons[k].rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1496 || ( subexons[k].leftType == 0 && subexons[k].rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1497 || ( subexons[k].leftType == 2 && subexons[k].rightType == 0 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1498 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1499 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1500
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1501 if ( cnt <= 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1502 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1503 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1504 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1505 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1506
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1507 BitTable tmpTable( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1508 for ( k = i ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1509 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1510 if ( ( subexons[k].leftType == 2 && subexons[k].rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1511 || ( subexons[k].leftType == 0 && subexons[k].rightType == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1512 || ( subexons[k].leftType == 2 && subexons[k].rightType == 0 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1513 tmpTable.Set( k ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1514 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1515 togetherChain.push_back( tmpTable ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1516 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1517 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1518
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1519 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1520 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1521 double value = inf ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1522 int tag = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1523
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1524 alltranscripts[i].abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1525 alltranscripts[i].constraintsSupport = new double[tcCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1526
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1527 std::vector<int> subexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1528 alltranscripts[i].seVector.GetOnesIndices( subexonIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1529 int seIdxCnt = subexonIdx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1530 transcriptLength[i] = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1531
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1532 firstRegion.a = subexonIdx[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1533 for ( j = 1 ; j < seIdxCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1534 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1535 if ( subexons[ subexonIdx[j] ].start > subexons[ subexonIdx[j - 1] ].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1536 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1537 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1538 firstRegion.b = subexonIdx[j - 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1539 lastRegion.b = subexonIdx[ seIdxCnt - 1 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1540 for ( j = seIdxCnt - 2 ; j >= 0 ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1541 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1542 if ( subexons[ subexonIdx[j] ].end < subexons[ subexonIdx[j + 1] ].start - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1543 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1544 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1545 lastRegion.a = subexonIdx[j + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1546
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1547 for ( j = 0 ; j < seIdxCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1548 transcriptLength[i] += subexons[ subexonIdx[j] ].end - subexons[ subexonIdx[j] ].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1549
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1550 //for ( j = firstRegion.b ; j < lastRegion.a ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1551 for ( j = 0 ; j < seIdxCnt - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1552 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1553 chain[j].a = subexonIdx[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1554 chain[j].b = subexonIdx[j + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1555 covered[j] = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1556 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1557 memset( usedConstraints, false, sizeof( bool ) * constraints.constraints.size() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1558 int compatibleCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1559 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1560 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1561 alltranscripts[i].constraintsSupport[j] = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1562 if ( btable[i].Test(j) && tc[j].abundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1563 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1564 ++compatibleCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1565 double adjustAbundance = tc[j].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1566 if ( seIdxCnt > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1567 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1568 if ( tc[j].i == tc[j].j
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1569 && ( constraints.constraints[ tc[j].i ].first +
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1570 constraints.constraints[ tc[j].i ].last == 2 * alltranscripts[i].first
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1571 || constraints.constraints[ tc[j].i ].first +
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1572 constraints.constraints[ tc[j].i ].last == 2 * alltranscripts[i].last ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1573 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1574 adjustAbundance = inf ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1575 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1576 else if ( tc[j].i != tc[j].j
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1577 && ( constraints.constraints[ tc[j].i ].first +
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1578 constraints.constraints[ tc[j].i ].last == 2 * alltranscripts[i].first
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1579 || constraints.constraints[ tc[j].i ].first +
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1580 constraints.constraints[ tc[j].i ].last == 2 * alltranscripts[i].last ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1581 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1582 adjustAbundance = constraints.constraints[ tc[j].j ].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1583 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1584 else if ( tc[j].i != tc[j].j
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1585 && ( constraints.constraints[ tc[j].j ].first +
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1586 constraints.constraints[ tc[j].j ].last == 2 * alltranscripts[i].first
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1587 || constraints.constraints[ tc[j].j ].first +
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1588 constraints.constraints[ tc[j].j ].last == 2 * alltranscripts[i].last ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1589 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1590 adjustAbundance = constraints.constraints[ tc[j].i ].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1591 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1592 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1593 if ( adjustAbundance < value )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1594 /*!( seIdxCnt > 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1595 && ( ( ( constraints.constraints[ tc[j].i ].first >= firstRegion.a && constraints.constraints[ tc[j].i ].last <= firstRegion.b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1596 && ( constraints.constraints[ tc[j].j ].first >= firstRegion.a && constraints.constraints[ tc[j].j ].last <= firstRegion.b ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1597 || ( ( constraints.constraints[ tc[j].i ].first >= lastRegion.a && constraints.constraints[ tc[j].i ].last <= lastRegion.b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1598 && ( constraints.constraints[ tc[j].j ].first >= lastRegion.a && constraints.constraints[ tc[j].j ].last <= lastRegion.b ) ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1599 )*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1600 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1601 // Not use the constraints totally within the 3'/5'-end in the transcript
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1602 value = adjustAbundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1603 tag = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1604 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1605 avgTranscriptAbundance[i] += tc[j].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1606
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1607 if ( !usedConstraints[ tc[j].i ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1608 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1609 struct _constraint &c = constraints.constraints[ tc[j].i ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1610 for ( k = 0 ; k < seIdxCnt - 1 ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1611 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1612 // Note that since the constraint is already compatible with the txpt,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1613 // chain[k].a/b must be also adjacent in this constraint.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1614 if ( c.vector.Test( chain[k].a ) && c.vector.Test( chain[k].b ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1615 covered[k] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1616 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1617 usedConstraints[ tc[j].i ] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1618 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1619
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1620 if ( !usedConstraints[ tc[j].j ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1621 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1622 struct _constraint &c = constraints.constraints[ tc[j].j ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1623 for ( k = 0 ; k < seIdxCnt - 1 ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1624 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1625 if ( c.vector.Test( chain[k].a ) && c.vector.Test( chain[k].b ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1626 covered[k] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1627 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1628 usedConstraints[ tc[j].j ] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1629 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1630 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1631 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1632
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1633 // Get some penalty if something should together did not show up together
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1634 int size = togetherChain.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1635 if ( size > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1636 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1637 BitTable bufferTable( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1638 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1639 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1640 bufferTable.Assign( togetherChain[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1641 bufferTable.And( alltranscripts[i].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1642 //if ( !bufferTable.IsAllZero() && !bufferTable.IsEqual( togetherChain[j] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1643 // value /= 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1644
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1645 if ( !bufferTable.IsAllZero() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1646 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1647 if ( bufferTable.IsEqual( togetherChain[j] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1648 //printf( "nice together!\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1649 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1650 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1651 value /= 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1652 //printf( "bad together!\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1653 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1654 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1655 bufferTable.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1656 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1657
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1658
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1659 // Every two-subexon chain should be covered by some reads if a transcript is expressed highly enough
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1660 int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1661 for ( j = 0 ; j < seIdxCnt - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1662 if ( covered[j] == false ) // && j >= firstRegion.b && j <= lastRegion.a - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1663 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1664 value = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1665 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1666 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1667 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1668 if ( seIdxCnt > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1669 coveredPortion[i] = (double)cnt / (double)( seIdxCnt - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1670 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1671 coveredPortion[i] = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1672 if ( coveredPortion[i] == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1673 coveredPortion[i] = (double)0.5 / ( seIdxCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1674
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1675 // For short subexon (readLength-subexon_length-1>30), we further require a constraint cover three conseuctive subexon
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1676 /*memset( usedConstraints, false, sizeof( bool ) * constraints.constraints.size() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1677 for ( j = 1 ; j < seIdxCnt - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1678 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1679 int k = subexonIdx[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1680 if ( alignments.readLen - ( subexons[k].end - subexons[k].start + 1 ) - 1 <= 30 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1681 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1682 // We need at least one of the side subexons are adjacent to the center one.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1683 if ( subexons[ subexonIdx[j - 1] ].end + 1 < subexons[k].start && subexons[k].end + 1 < subexons[ subexonIdx[j + 1] ].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1684 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1685
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1686 int l = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1687 for ( l = 0 ; l < tcCnt ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1688 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1689 if ( btable[i].Test(l) && tc[l].abundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1690 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1691 if ( !usedConstraints[ tc[l].i ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1692 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1693 struct _constraint &c = constraints.constraints[ tc[l].i ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1694 if ( c.vector.Test( subexonIdx[j - 1] ) && c.vector.Test( subexonIdx[j] ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1695 c.vector.Test( subexonIdx[j + 1] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1696 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1697 usedConstraints[ tc[l].i ] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1698 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1699
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1700 if ( !usedConstraints[ tc[l].j ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1701 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1702 struct _constraint &c = constraints.constraints[ tc[l].j ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1703 if ( c.vector.Test( subexonIdx[j - 1] ) && c.vector.Test( subexonIdx[j] ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1704 c.vector.Test( subexonIdx[j + 1] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1705 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1706 usedConstraints[ tc[l].j ] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1707 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1708 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1709 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1710 // It is not covered
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1711 if ( l >= tcCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1712 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1713 int residual = alignments.readLen - ( subexons[k].end - subexons[k].start + 1 ) - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1714 //printf( "residual: %d %d %lf\n", k, residual, value ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1715 if ( value * residual > 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1716 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1717 value = 1 / (double)residual ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1718 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1719 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1720 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1721
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1722 if ( tag == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1723 value = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1724 if ( value > maxAbundance )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1725 maxAbundance = value ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1726 transcriptAbundance[i] = value ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1727 if ( tag != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1728 avgTranscriptAbundance[i] /= compatibleCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1729
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1730 //printf( "abundance %d: %lf %lf ", i, value, avgTranscriptAbundance[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1731 //alltranscripts[i].seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1732 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1733 if ( maxAbundance == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1734 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1735 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1736 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1737 transcriptAbundance[i] = coveredPortion[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1738 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1739 maxAbundance = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1740 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1741 //printf( "%s: %lf\n", __func__, maxAbundance ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1742 int size = togetherChain.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1743 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1744 togetherChain[j].Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1745 delete[] usedConstraints ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1746 delete[] covered ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1747 delete[] chain ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1748 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1749 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1750 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1751 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1752 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1753 transcriptAbundance[i] = alltranscripts[i].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1754 if ( transcriptAbundance[i] > maxAbundance )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1755 maxAbundance = transcriptAbundance[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1756 coveredPortion[i] = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1757 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1758 if ( maxAbundance == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1759 maxAbundance = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1760 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1761
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1762 // Obtain the prefix, suffix information of the transcripts.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1763 int *nextSuffix, *nextPrefix ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1764 struct _pair32 *txptRank ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1765 nextSuffix = new int[atcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1766 nextPrefix = new int[atcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1767 txptRank = new struct _pair32[atcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1768 memset( nextSuffix, -1, sizeof( int ) * atcnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1769 memset( nextPrefix, -1, sizeof( int ) * atcnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1770 /*for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1771 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1772 std::vector<int> subexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1773 txptRank[i].a = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1774 alltranscripts[i].seVector.GetOnesIndices( subexonIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1775 txptRank[i].b = subexonIdx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1776 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1777 qsort( txptRank, atcnt, sizeof( struct _pair32 ), CompPairsByB) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1778 BitTable bufferTable( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1779 for ( i = atcnt - 1 ; i >= 0 ; --i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1780 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1781 int a = txptRank[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1782 for ( j = i - 1 ; j >= 0 ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1783 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1784 if ( txptRank[i].b == txptRank[j].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1785 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1786
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1787 int b = txptRank[j].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1788
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1789 if ( alltranscripts[b].last != alltranscripts[a].last )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1790 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1791
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1792 bufferTable.Assign( alltranscripts[a].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1793 bufferTable.MaskRegionOutside( alltranscripts[b].first, alltranscripts[b].last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1794 if ( bufferTable.IsEqual( alltranscripts[b].seVector ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1795 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1796 nextSuffix[a] = b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1797 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1798 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1799 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1800 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1801 for ( i = atcnt - 1 ; i >= 0 ; --i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1802 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1803 int a = txptRank[i].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1804 for ( j = i - 1 ; j >= 0 ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1805 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1806 if ( txptRank[i].b == txptRank[j].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1807 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1808
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1809 int b = txptRank[j].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1810
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1811 if ( alltranscripts[b].first != alltranscripts[a].first )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1812 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1813
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1814 bufferTable.Assign( alltranscripts[a].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1815 bufferTable.MaskRegionOutside( alltranscripts[b].first, alltranscripts[b].last ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1816 if ( bufferTable.IsEqual( alltranscripts[b].seVector ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1817 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1818 nextPrefix[a] = b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1819 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1820 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1821 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1822 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1823
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1824 bufferTable.Release() ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1825 delete[] txptRank ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1826
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1827 // Quantative Set-Cover
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1828 int iterCnt = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1829 double *coverCnt = new double[atcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1830 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1831 coverCnt[i] = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1832 int *list = new int[atcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1833 int listCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1834
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1835 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1836 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1837 double max = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1838 int maxtag = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1839 double maxcnt = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1840 ++iterCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1841
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1842 // Find the optimal candidate.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1843 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1844 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1845 double value = inf ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1846 double cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1847
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1848 if ( coverCnt[i] == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1849 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1850 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1851 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1852 if ( tc[j].abundance > 0 && btable[i].Test( j ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1853 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1854 cnt += tc[j].effectiveCount ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1855 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1856 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1857 /*else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1858 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1859 std::vector<int> tcIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1860 btable[i].GetOnesIndices( tcIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1861 int size = tcIdx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1862 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1863 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1864 if ( tc[ tcIdx[j] ].abundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1865 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1866 cnt += tc[ tcIdx[j] ].effectiveCount ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1867 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1868 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1869 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1870 coverCnt[i] = cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1871 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1872 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1873 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1874 cnt = coverCnt[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1875 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1876
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1877 value = transcriptAbundance[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1878 if ( cnt < 1 ) // This transcript does not satisfy any undepleted constraints.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1879 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1880 cnt *= coveredPortion[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1881
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1882 double weight = 1 ; //* seCnt / transcriptSeCnt[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1883 //if ( maxAbundance >= 1 && value / maxAbundance >= 0.2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1884 // seCntAdjust = sqrt( (double)( transcriptSeCnt[i] ) / seCnt ) ;//< 0.5 ? 0.5 : (double)( transcriptSeCnt[i] ) / seCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1885
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1886 if ( alltranscripts[i].FPKM > 0 && sampleCnt > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1887 weight = ( 1 + alltranscripts[i].FPKM / sampleCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1888
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1889 double score = ComputeScore( cnt, weight, value, maxAbundance, alltranscripts[i].correlationScore ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1890 if ( cnt > maxcnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1891 maxcnt = cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1892 score += adjustScore[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1893 if ( score > max )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1894 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1895 max = score ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1896 maxtag = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1897 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1898 else if ( score == max )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1899 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1900 if ( avgTranscriptAbundance[maxtag] < avgTranscriptAbundance[i] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1901 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1902 max = score ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1903 maxtag = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1904 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1905 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1906 //printf( "score: %d %lf -> %lf\n", i, cnt, score ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1907 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1908
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1909 if ( maxcnt == 0 || maxtag == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1910 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1911
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1912 // Find the constraint that should be depleted.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1913 double update = inf ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1914 int updateTag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1915 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1916 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1917 if ( btable[ maxtag ].Test( j ) && tc[j].abundance > 0 &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1918 tc[j].abundance <= update )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1919 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1920 update = tc[j].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1921 updateTag = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1922 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1923 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1924
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1925 // Search suffix and prefix to see whether these fit better.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1926 int p = nextSuffix[ maxtag] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1927 while ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1928 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1929 if ( transcriptAbundance[p] >= 10.0 * transcriptAbundance[maxtag]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1930 && btable[p].Test( updateTag ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1931 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1932 //printf( "%d\n", p ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1933 maxtag = p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1934 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1935 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1936 p = nextSuffix[p] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1937 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1938 p = nextPrefix[maxtag] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1939 while ( p != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1940 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1941 if ( transcriptAbundance[p] >= 10.0 * transcriptAbundance[maxtag]
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1942 && btable[p].Test( updateTag ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1943 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1944 maxtag = p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1945 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1946 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1947 p = nextPrefix[p] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1948 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1949
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1950
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1951 // Update the abundance.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1952 int supportCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1953 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1954 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1955 if ( btable[maxtag].Test( j ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1956 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1957 if ( tc[j].abundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1958 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1959 tc[j].abundance -= 1 * update ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1960 double factor = tc[j].effectiveCount ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1961 double tmp = ( tc[j].support * update / tc[j].normAbund * factor ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1962 alltranscripts[maxtag].constraintsSupport[j] += tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1963 alltranscripts[maxtag].abundance += tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1964
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1965 if ( tc[j].abundance <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1966 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1967 int l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1968 for ( l = 0 ; l < atcnt ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1969 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1970 if ( btable[l].Test(j) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1971 coverCnt[l] -= tc[j].effectiveCount ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1972 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1973 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1974 ++supportCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1975 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1976 else if ( alltranscripts[maxtag].constraintsSupport[j] == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1977 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1978 double sum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1979 double takeOut = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1980 double factor = tc[j].effectiveCount ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1981 listCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1982 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1983 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1984 if ( i == maxtag )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1985 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1986
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1987 if ( alltranscripts[i].abundance > 0 && btable[i].Test(j) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1988 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1989 sum += alltranscripts[i].constraintsSupport[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1990
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1991 double tmp = ( alltranscripts[i].constraintsSupport[j] + alltranscripts[maxtag].constraintsSupport[j] ) *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1992 transcriptAbundance[maxtag] / ( transcriptAbundance[maxtag] + transcriptAbundance[i] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1993 - alltranscripts[maxtag].constraintsSupport[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1994 if ( tmp > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1995 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1996 list[ listCnt ] = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1997 ++listCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1998 takeOut += tmp ; //alltranscripts[i].constraintsSupport[j] * transcriptAbundance[maxtag] / ( transcriptAbundance[maxtag] + transcriptAbundance[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1999 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2000 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2001 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2002
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2003 double ratio = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2004 double takeOutFactor = 0.5 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2005 if ( update < tc[j].normAbund )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2006 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2007 if ( takeOut > ( tc[j].support * update / tc[j].normAbund * factor ) * takeOutFactor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2008 ratio = ( tc[j].support * update / tc[j].normAbund * factor ) * takeOutFactor / takeOut ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2009 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2010 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2011 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2012 if ( takeOut > ( tc[j].support * factor ) * takeOutFactor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2013 ratio = ( tc[j].support * factor ) * takeOutFactor / takeOut ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2014 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2015
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2016 if ( 1 ) //update < tc[j].normAbund )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2017 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2018 for ( i = 0 ; i < listCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2019 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2020 //double tmp = ( tc[j].support * update / tc[j].normAbund * factor ) *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2021 // ( alltranscripts[ list[i] ].constraintsSupport[j] / sum ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2022 //if ( alltranscripts[ list[i] ].constraintsSupport[j] < tmp )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2023 // printf( "WARNING! %lf %lf, %lf\n", alltranscripts[ list[i] ].constraintsSupport[j], sum, tmp ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2024
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2025 //double tmp = alltranscripts[ list[i] ].constraintsSupport[j] * transcriptAbundance[maxtag] / ( transcriptAbundance[maxtag] + transcriptAbundance[ list[i] ] ) * ratio ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2026 double tmp = ( ( alltranscripts[ list[i] ].constraintsSupport[j] + alltranscripts[maxtag].constraintsSupport[j] ) *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2027 transcriptAbundance[maxtag] / ( transcriptAbundance[maxtag] + transcriptAbundance[ list[i] ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2028 - alltranscripts[maxtag].constraintsSupport[j] ) * ratio ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2029
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2030
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2031 alltranscripts[ list[i] ].constraintsSupport[j] -= tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2032 alltranscripts[ list[i] ].abundance -= tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2033 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2034 //double tmp = ( tc[j].support * update / tc[j].normAbund * factor ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2035 //printf( "%lf %lf. %lf %lf\n", takeOut, ratio, update, tc[j].normAbund ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2036 double tmp = takeOut * ratio ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2037 alltranscripts[maxtag].constraintsSupport[j] += tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2038 alltranscripts[maxtag].abundance += tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2039 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2040 /*else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2041 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2042 double tmp = ( tc[j].support / (double)( listCnt + 1 ) ) * factor ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2043 for ( i = 0 ; i < listCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2044 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2045 alltranscripts[ list[i] ].abundance -= alltranscripts[ list[i] ].constraintsSupport[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2046
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2047 alltranscripts[ list[i] ].constraintsSupport[j] = tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2048 alltranscripts[ list[i] ].abundance += tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2049 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2050 alltranscripts[maxtag].constraintsSupport[j] += tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2051 alltranscripts[maxtag].abundance += tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2052 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2053
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2054 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2055 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2056
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2057 if ( tc[j].abundance < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2058 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2059 tc[j].abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2060
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2061 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2062 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2063 tc[ updateTag ].abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2064 if ( supportCnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2065 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2066 //adjustScore[maxtag] += 1 / (double)tcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2067 //printf( "maxtag=%d %lf %d\n", maxtag, update, updateTag ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2068 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2069
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2070 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2071 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2072 if ( alltranscripts[i].abundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2073 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2074 struct _transcript nt = alltranscripts[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2075 nt.seVector.Nullify() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2076 nt.seVector.Duplicate( alltranscripts[i].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2077 nt.constraintsSupport = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2078 if ( transcriptAbundance[i] == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2079 nt.correlationScore = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2080 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2081 nt.correlationScore = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2082 nt.id = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2083 transcripts.push_back( nt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2084 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2085 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2086
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2087 // Release the memory of btable.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2088 for ( i = 0 ; i < atcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2089 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2090 delete[] alltranscripts[i].constraintsSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2091 btable[i].Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2092 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2093 delete[] btable ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2094
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2095 delete[] list ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2096 delete[] transcriptSeCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2097 delete[] transcriptLength ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2098 delete[] transcriptAbundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2099 delete[] avgTranscriptAbundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2100 delete[] coveredPortion ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2101 delete[] adjustScore ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2102 delete[] coverCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2103
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2104 delete[] nextPrefix ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2105 delete[] nextSuffix ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2106
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2107 // Redistribute weight if there is some constraints that are unbalanced.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2108 /*tcnt = transcripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2109 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2110 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2111 int maxRatio = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2112 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2113 if ( transcripts[i].constraintsSupport[j] > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2114 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2115 double factor = tc[j].effectiveCount ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2116 if ( transcripts[])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2117 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2118 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2119 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2120
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2121 void TranscriptDecider::AbundanceEstimation( struct _subexon *subexons, int seCnt, Constraints &constraints, std::vector<struct _transcript> &transcripts )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2122 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2123 int tcnt = transcripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2124 int size ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2125 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2126 if ( tcnt <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2127 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2128
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2129 std::vector<struct _matePairConstraint> &tc = constraints.matePairs ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2130 int tcCnt = tc.size() ; // transcript constraints
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2131
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2132 BitTable *btable = new BitTable[ tcnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2133 int *transcriptLength = new int[tcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2134 int *compatibleList = new int[tcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2135 double *rho = new double[tcnt] ; // the abundance.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2136 int iterCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2137
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2138 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2139 transcripts[i].constraintsSupport = new double[ tcCnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2140
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2141 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2142 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2143 btable[i].Init( tcCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2144 double min = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2145 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2146 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2147 int a = tc[j].i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2148 int b = tc[j].j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2149
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2150 if ( IsConstraintInTranscript( transcripts[i], constraints.constraints[a] ) == 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2151 && IsConstraintInTranscript( transcripts[i], constraints.constraints[b] ) == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2152 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2153 //printf( "set btble[ %d ].Set( %d ): %d %d\n", i, j, a, b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2154 btable[i].Set( j ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2155
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2156 if ( min == -1 || tc[j].normAbund < min )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2157 min = tc[j].normAbund ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2158 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2159 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2160
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2161 std::vector<int> subexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2162 transcripts[i].seVector.GetOnesIndices( subexonIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2163 int subexonIdxCnt = subexonIdx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2164 int len = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2165 for ( j = 0 ; j < subexonIdxCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2166 len += subexons[ subexonIdx[j] ].end - subexons[ subexonIdx[j] ].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2167 transcriptLength[i] = len - alignments.fragLen + 2 * alignments.fragStdev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2168 if ( transcriptLength[i] < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2169 transcriptLength[i] = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2170 rho[i] = transcripts[i].abundance / transcriptLength[i] ; // use the rough estimation generated before.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2171 if ( transcripts[i].correlationScore == -1 && rho[i] > 0.1 / (double)alignments.readLen )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2172 rho[i] = 0.1 / (double)alignments.readLen ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2173 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2174
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2175 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2176 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2177 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2178 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2179 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2180 transcripts[i].constraintsSupport[j] = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2181 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2182 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2183 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2184 int clCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2185 double sum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2186 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2187 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2188 if ( btable[i].Test(j) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2189 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2190 compatibleList[ clCnt ] = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2191 ++clCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2192 sum += rho[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2193 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2194 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2195
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2196 for ( i = 0 ; i < clCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2197 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2198 double factor = tc[j].effectiveCount ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2199 transcripts[ compatibleList[i] ].constraintsSupport[j] = ( rho[ compatibleList[i] ] / sum ) * tc[j].support * factor ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2200 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2201 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2202
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2203 double diff = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2204 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2205 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2206 double newAbund = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2207 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2208 newAbund += transcripts[i].constraintsSupport[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2209 double old = rho[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2210 rho[i] = newAbund / transcriptLength[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2211 //printf( "rho[%d]=%lf\n", i, rho[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2212 if ( transcripts[i].correlationScore == -1 && rho[i] > 0.1 / (double)alignments.readLen )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2213 rho[i] = 0.1 / (double)alignments.readLen ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2214
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2215 double tmp = ( old - rho[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2216 diff += tmp < 0 ? -tmp : tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2217 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2218 //printf( "%lf\n", diff ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2219 if ( diff < 1e-3)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2220 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2221
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2222 ++iterCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2223 if ( iterCnt >= 1000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2224 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2225 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2226
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2227 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2228 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2229 //printf( "%lf=>", transcripts[i].abundance ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2230 transcripts[i].abundance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2231 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2232 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2233 transcripts[i].abundance += transcripts[i].constraintsSupport[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2234 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2235 //printf( "%lf. (%lf)\n", transcripts[i].abundance, transcripts[i].correlationScore ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2236 //transcripts[i].seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2237 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2238
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2239 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2240 delete[] transcripts[i].constraintsSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2241
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2242 // Release the memory of btable.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2243 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2244 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2245 btable[i].Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2246 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2247 delete[] compatibleList ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2248 delete[] btable ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2249 delete[] transcriptLength ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2250 delete[] rho ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2251 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2252
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2253 int TranscriptDecider::RefineTranscripts( struct _subexon *subexons, int seCnt, bool aggressive,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2254 std::map<int, int> *subexonChainSupport, int *txptSampleSupport, std::vector<struct _transcript> &transcripts, Constraints &constraints )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2255 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2256 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2257 int tcnt = transcripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2258 if ( tcnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2259 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2260 int tcCnt = constraints.matePairs.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2261
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2262 std::vector<struct _matePairConstraint> &tc = constraints.matePairs ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2263 std::vector<struct _constraint> &scc = constraints.constraints ; //single-end constraints.constraints
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2264
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2265 // Remove transcripts whose FPKM are too small.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2266 //printf( "%d %d\n", usedGeneId, baseGeneId ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2267 double *geneMaxFPKM = new double[usedGeneId - baseGeneId ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2268 int *geneMaxFPKMTag = new int[usedGeneId - baseGeneId ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2269 double *nonOverlapMaxFPKM = new double[ usedGeneId - baseGeneId ] ; // the max FPKM among all the transcripts not overlapping with maxFPKMTag transcripts.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2270 memset( geneMaxFPKM, 0, sizeof( double ) * ( usedGeneId - baseGeneId ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2271 memset( geneMaxFPKMTag, 0, sizeof( int ) * ( usedGeneId - baseGeneId ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2272 memset( nonOverlapMaxFPKM, 0, sizeof( double ) * ( usedGeneId - baseGeneId ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2273
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2274 double *geneMaxCov = new double[ usedGeneId - baseGeneId ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2275 memset( geneMaxCov, 0, sizeof( double ) * ( usedGeneId - baseGeneId ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2276 int *txptGid = new int[tcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2277
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2278 /*for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2279 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2280 printf( "%d: %lf ", i, transcripts[i].FPKM ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2281 transcripts[i].seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2282 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2283
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2284 /*==================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2285 Remove transcripts that has too few relative FPKM. (-f)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2286 ====================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2287 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2288 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2289 int gid = GetTranscriptGeneId( transcripts[i], subexons ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2290 int len = GetTranscriptLengthFromAbundanceAndFPKM( transcripts[i].abundance, transcripts[i].FPKM ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2291 //printf( "gid=%d\n", gid ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2292 //printf( "%lf %lf %d\n", transcripts[i].abundance, transcripts[i].FPKM, len ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2293 if ( transcripts[i].FPKM > geneMaxFPKM[gid - baseGeneId ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2294 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2295 geneMaxFPKM[ gid - baseGeneId ] = transcripts[i].FPKM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2296 geneMaxFPKMTag[ gid - baseGeneId ] = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2297 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2298 if ( transcripts[i].abundance * alignments.readLen / len > geneMaxCov[gid - baseGeneId ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2299 geneMaxCov[gid - baseGeneId] = ( transcripts[i].abundance * alignments.readLen ) / len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2300 txptGid[i] = gid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2301 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2302
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2303 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2304 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2305 int tag = txptGid[i] - baseGeneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2306 if ( ( transcripts[i].last < transcripts[ geneMaxFPKMTag[ tag ] ].first
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2307 || transcripts[i].first > transcripts[ geneMaxFPKMTag[tag] ].last ) && transcripts[i].FPKM > nonOverlapMaxFPKM[tag] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2308 nonOverlapMaxFPKM[tag] = transcripts[i].FPKM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2309 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2310 BitTable bufferTable ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2311 bufferTable.Duplicate( transcripts[0].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2312
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2313 if ( !aggressive )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2314 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2315 // Rescue the transcripts covering unique constraints.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2316 int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2317 int tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2318 int *uniqCount = new int[tcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2319 memset( uniqCount, 0, sizeof( int ) * tcnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2320 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2321 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2322 cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2323 if ( tc[j].uniqSupport <= 5 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2324 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2325 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2326 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2327 if ( IsConstraintInTranscript( transcripts[i], scc[ tc[j].i ] ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2328 IsConstraintInTranscript( transcripts[i], scc[ tc[j].j] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2329 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2330 tag = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2331 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2332 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2333 if ( cnt >= 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2334 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2335 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2336 if ( cnt == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2337 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2338 ++uniqCount[tag] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2339 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2340 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2341 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2342 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2343 if ( uniqCount[i] >= 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2344 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2345 transcripts[i].abundance *= 4 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2346 transcripts[i].FPKM *= 4 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2347 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2348 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2349
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2350 delete[] uniqCount ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2351 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2352
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2353 int sccCnt = scc.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2354 double filterFactor = 1.0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2355
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2356 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2357 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2358 //printf( "%d: %lf %lf\n", txptGid[i], transcripts[i].abundance, geneMaxFPKM[ txptGid[i] - baseGeneId ] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2359
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2360 if ( transcripts[i].FPKM < filterFactor * FPKMFraction * geneMaxFPKM[ txptGid[i] - baseGeneId ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2361 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2362 /*int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2363 int coverCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2364 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2365 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2366 if ( transcripts[i].constraintsSupport[j] > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2367 ++coverCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2368 double factor = tc[j].effectiveCount ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2369 if ( transcripts[i].constraintsSupport[j] >= factor * tc[j].support - 1e-3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2370 && tc[j].support >= 10
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2371 && tc[j].uniqSupport >= 0.95 * tc[j].support )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2372 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2373 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2374 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2375 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2376 //cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2377 if ( cnt >= 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2378 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2379 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2380 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2381 else*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2382 transcripts[i].abundance = -transcripts[i].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2383 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2384 //if ( transcripts[i].FPKM >= 0.8 * geneMaxFPKM[ txptGid[i] - baseGeneId ] && geneMaxCov[ txptGid[i] - baseGeneId ] >= txptMinReadDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2385 // continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2386 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2387
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2388 if ( nonOverlapMaxFPKM != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2389 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2390 // Go two iterations to rescue, the first iteration should be just for marking.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2391 std::vector<int> rescueList ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2392 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2393 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2394 if ( transcripts[i].abundance >= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2395 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2396
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2397 for ( j = 0 ; j < tcnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2398 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2399 if ( transcripts[j].abundance < 0 || txptGid[i] != txptGid[j] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2400 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2401 if ( transcripts[i].first <= transcripts[j].last && transcripts[i].last >= transcripts[j].first )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2402 /*bufferTable.Assign( transcripts[i].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2403 bufferTable.And( transcripts[j].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2404
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2405 if ( !bufferTable.IsAllZero() )*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2406 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2407 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2408 if ( j >= tcnt && transcripts[i].FPKM >= FPKMFraction * nonOverlapMaxFPKM[ txptGid[i] - baseGeneId ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2409 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2410 //transcripts[i].abundance = -transcripts[i].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2411 rescueList.push_back( i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2412 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2413 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2414
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2415 int size = rescueList.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2416 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2417 transcripts[ rescueList[i] ].abundance *= -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2418 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2419
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2420 /*==================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2421 Remove transcripts that has too few read coverage (-d)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2422 ====================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2423 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2424 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2425 if ( transcripts[i].abundance >= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2426 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2427 int len = GetTranscriptLengthFromAbundanceAndFPKM( transcripts[i].abundance, transcripts[i].FPKM ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2428 double cov = ( transcripts[i].abundance * alignments.readLen ) / len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2429 //printf( "%d: %d %d %lf %lf\n", i, len, transcripts[i].seVector.Count(), cov, geneMaxCov[ txptGid[i] - baseGeneId ] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2430
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2431 if ( ( tcnt > 1 || len <= 1000 || transcripts[i].seVector.Count() <= 3 ) && cov < txptMinReadDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2432 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2433 //if ( usedGeneId == baseGeneId + 1 && /*transcripts[i].seVector.Count() > 3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2434 // && len > 1000 &&*/ geneMaxCov[ txptGid[i] - baseGeneId ] == cov )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2435 if ( geneMaxCov[ txptGid[i] - baseGeneId ] == cov )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2436 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2437
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2438 // Test whether it has some very abundant constraints.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2439 /*int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2440 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2441 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2442 if ( transcripts[i].constraintsSupport[j] >= tc[j].support / 2.0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2443 && tc[j].support >= 10
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2444 && tc[j].uniqSupport >= 0.95 * tc[j].support
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2445 && tc[j].normAbund >= 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2446 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2447 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2448 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2449 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2450
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2451 if ( cnt >= 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2452 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2453 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2454 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2455
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2456 // Test whether this transcript is fully covered. If so ,we can filter it.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2457
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2458 if ( geneMaxCov[ txptGid[i] - baseGeneId ] <= 5 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2459 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2460 bufferTable.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2461 for ( j = 0 ; j < sccCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2462 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2463 if ( !IsConstraintInTranscript( transcripts[i], scc[j] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2464 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2465 bufferTable.Or( scc[j].vector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2466 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2467 if ( bufferTable.IsEqual( transcripts[i].seVector ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2468 transcripts[i].abundance = -transcripts[i].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2469 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2470 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2471 transcripts[i].abundance = -transcripts[i].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2472
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2473 /*else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2474 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2475 transcripts[i].seVector.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2476 bufferTable.Print() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2477 OutputTranscript( stderr, subexons, transcripts[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2478 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2479 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2480 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2481 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2482
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2483 /*==================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2484 Remove transcripts that is too short
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2485 ====================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2486 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2487 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2488 if ( transcripts[i].abundance <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2489 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2490
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2491 int len = GetTranscriptLengthFromAbundanceAndFPKM( transcripts[i].abundance, transcripts[i].FPKM ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2492 if ( len < 200 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2493 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2494 transcripts[i].abundance = -transcripts[i].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2495 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2496 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2497
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2498 // Rescue transcripts that showed up in many samples.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2499 /*for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2500 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2501 if ( transcripts[i].abundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2502 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2503 if ( txptSampleSupport[ transcripts[i].id ] >= 3 &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2504 txptSampleSupport[transcripts[i].id ] >= (int)( sampleCnt / 2 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2505 transcripts[i].abundance = -transcripts[i].abundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2506 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2507
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2508 // Rescue some transcripts covering subexon chains showed up in many samples, but missing after filtration.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2509 struct _constraint tmpC ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2510 tmpC.vector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2511
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2512 std::vector< struct _pair32 > missingChain ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2513 std::vector<int> recoverCandidate ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2514 bool *used = new bool[tcnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2515 memset( used, false, sizeof( bool ) * tcnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2516
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2517 // Obtain the list of transcripts that should be recovered.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2518 for ( i = 0 ; i < seCnt && sampleCnt > 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2519 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2520
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2521 double maxFPKM = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2522 for ( std::map<int, int>::iterator it = subexonChainSupport[i].begin() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2523 it != subexonChainSupport[i].end() ; ++it )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2524 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2525 if ( sampleCnt >= 0 && ( it->second < 3 || it->second < (int)( 0.5 * sampleCnt ) ) && it->second <= sampleCnt / 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2526 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2527
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2528 bool recover = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2529 tmpC.vector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2530 tmpC.vector.Set( i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2531 tmpC.vector.Set( it->first ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2532 tmpC.first = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2533 tmpC.last = it->first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2534
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2535
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2536 for ( j = 0 ; j < tcnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2537 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2538 if ( transcripts[j].abundance < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2539 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2540
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2541 if ( IsConstraintInTranscript( transcripts[j], tmpC ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2542 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2543 recover = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2544 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2545 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2546
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2547 if ( recover )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2548 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2549 for ( j = 0 ; j < tcnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2550 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2551 if ( transcripts[j].abundance > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2552 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2553 //printf( "%d %lf\n", IsConstraintInTranscript( transcripts[j], tmpC ), transcripts[j].FPKM ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2554 if ( IsConstraintInTranscript( transcripts[j], tmpC ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2555 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2556 /*if ( maxTag == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2557 maxTag = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2558 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2559 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2560 if ( txptSampleSupport[ transcripts[j].id ] > txptSampleSupport[ transcripts[maxTag ].id ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2561 maxTag = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2562 else if ( txptSampleSupport[ transcripts[j].id ] == txptSampleSupport[ transcripts[maxTag ].id ])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2563 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2564 if ( transcripts[j].FPKM > transcripts[maxTag].FPKM )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2565 maxTag = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2566 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2567 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2568
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2569 struct _pair32 np ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2570 np.a = i ; np.b = it->first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2571 missingChain.push_back( np ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2572
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2573 if ( !used[j] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2574 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2575 recoverCandidate.push_back( j ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2576 used[j] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2577 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2578 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2579 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2580
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2581 /*if ( maxTag != -1 && txptSampleSupport[ transcripts[maxTag].id ] > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2582 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2583 //printf( "recover %d %d\n", maxTag, txptSampleSupport[ transcripts[maxTag].id ] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2584 transcripts[maxTag].abundance *= -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2585 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2586 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2587 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2588
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2589 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2590 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2591
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2592 int size = recoverCandidate.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2593 memset( used, false, sizeof( bool ) * tcnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2594 // Recover the candidates in the order of reliability
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2595 int *geneRecoverCnt = new int[ usedGeneId - baseGeneId ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2596 memset( geneRecoverCnt, 0, sizeof( int ) * ( usedGeneId - baseGeneId ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2597 int round = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2598 if ( aggressive && size > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2599 round = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2600
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2601 for ( i = 0 ; i < size ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2602 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2603 int maxTag = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2604 int maxCover = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2605 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2606 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2607 if ( !used[ recoverCandidate[j] ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2608 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2609 /*int cover = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2610
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2611 k = missingChain.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2612 int l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2613 for ( l = 0 ; l < k ; ++l )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2614 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2615 if ( missingChain[l].a == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2616 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2617
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2618 tmpC.vector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2619 tmpC.vector.Set( missingChain[l].a ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2620 tmpC.vector.Set( missingChain[l].b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2621 tmpC.first = missingChain[l].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2622 tmpC.last = missingChain[l].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2623
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2624 if ( IsConstraintInTranscript( transcripts[ recoverCandidate[j] ], tmpC ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2625 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2626 ++cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2627 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2628 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2629
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2630 if ( maxTag == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2631 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2632 maxTag = recoverCandidate[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2633 //maxCover = cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2634 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2635 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2636
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2637 /*if ( cover > maxCover )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2638 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2639 maxTag = recoverCandidate[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2640 maxCover = cover ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2641 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2642 else if ( cover == maxCover )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2643 {*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2644 if ( txptSampleSupport[ transcripts[ recoverCandidate[j] ].id ] >
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2645 txptSampleSupport[
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2646 transcripts[ maxTag ].id
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2647 ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2648 maxTag = recoverCandidate[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2649 else if ( txptSampleSupport[ transcripts[ recoverCandidate[j] ].id ] ==
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2650 txptSampleSupport[ transcripts[ maxTag ].id ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2651 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2652 if ( transcripts[ recoverCandidate[j] ].FPKM > transcripts[ maxTag ].FPKM )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2653 maxTag = recoverCandidate[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2654 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2655
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2656 /*else if ( transcripts[ recoverCandidate[j] ].FPKM > transcripts[ maxTag ].FPKM )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2657 maxTag = recoverCandidate[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2658 else if ( transcripts[ recoverCandidate[j] ].FPKM == transcripts[ maxTag ].FPKM )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2659 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2660 if ( txptSampleSupport[ transcripts[ recoverCandidate[j] ].id ] >
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2661 txptSampleSupport[ transcripts[ maxTag ].id ] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2662 maxTag = recoverCandidate[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2663 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2664 //}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2665 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2666 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2667
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2668 if ( maxTag == -1 || txptSampleSupport[ transcripts[ maxTag ].id ] <= 2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2669 || txptSampleSupport[ transcripts[maxTag].id ] < 0.5 * sampleCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2670 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2671
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2672 used[maxTag] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2673 if ( geneRecoverCnt[ txptGid[maxTag] - baseGeneId ] >= round )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2674 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2675 ++geneRecoverCnt[ txptGid[maxTag] - baseGeneId ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2676
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2677 k = missingChain.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2678 int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2679 for ( j = 0 ; j < k ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2680 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2681 if ( missingChain[j].a == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2682 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2683
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2684 tmpC.vector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2685 tmpC.vector.Set( missingChain[j].a ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2686 tmpC.vector.Set( missingChain[j].b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2687 tmpC.first = missingChain[j].a ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2688 tmpC.last = missingChain[j].b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2689
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2690 if ( IsConstraintInTranscript( transcripts[maxTag], tmpC ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2691 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2692 missingChain[j].a = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2693 ++cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2694 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2695 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2696
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2697 int len = GetTranscriptLengthFromAbundanceAndFPKM( transcripts[maxTag].abundance, transcripts[maxTag].FPKM ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2698 double cov = ( transcripts[maxTag].abundance * alignments.readLen ) / len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2699 if ( cnt >= 1 && cov > 1.0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2700 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2701 transcripts[maxTag].abundance *= -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2702 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2703 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2704 delete[] used ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2705 delete[] geneRecoverCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2706 tmpC.vector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2707
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2708
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2709 tcnt = RemoveNegativeAbundTranscripts( transcripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2710
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2711
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2712
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2713 delete []geneMaxCov ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2714 bufferTable.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2715 delete []geneMaxFPKM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2716 delete []geneMaxFPKMTag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2717 delete []nonOverlapMaxFPKM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2718 delete []txptGid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2719
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2720 /*==================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2721 Remove transcripts that seems duplicated
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2722 ====================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2723 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2724 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2725 int support = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2726 int uniqSupport = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2727
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2728 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2729 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2730 if ( !IsConstraintInTranscript( transcripts[i], scc[ tc[j].i ] ) || !IsConstraintInTranscript( transcripts[i], scc[ tc[j].j ] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2731 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2732 //support += scc[ tc[j].i ].support + scc[ tc[j].j ].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2733 //uniqSupport += scc[ tc[j].i ].uniqSupport + scc[ tc[j].j ].uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2734 support += tc[j].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2735 uniqSupport += tc[j].uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2736
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2737 //printf( "constraint uniqness: %d: %d %d\n", i, tc[j].uniqSupport, tc[j].support ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2738 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2739 //printf( "%d: %d %d\n", i, uniqSupport, support ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2740 if ( (double)uniqSupport < 0.03 * support )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2741 transcripts[i].abundance = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2742 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2743 tcnt = RemoveNegativeAbundTranscripts( transcripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2744
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2745
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2746 /*==================================================================
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2747 Remove shadow transcripts, the abnormal 2-exon txpt whose intron is very close to the true one or one of the anchor exon is shorter than 25bp....
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2748 ====================================================================*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2749 int minusCnt = 0, plusCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2750 int mainStrand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2751 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2752 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2753 if ( subexons[i].rightStrand == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2754 ++plusCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2755 else if ( subexons[i].rightStrand == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2756 ++minusCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2757 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2758 if ( plusCnt > minusCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2759 mainStrand = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2760 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2761 mainStrand = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2762
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2763 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2764 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2765 std::vector<int> subexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2766 transcripts[i].seVector.GetOnesIndices( subexonIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2767 int size = subexonIdx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2768 int intronCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2769 int anchorIdx = 0 ; // the subexon adjacent to the only intron.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2770
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2771 for ( j = 0 ; j < size - 1 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2772 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2773 if ( subexons[ subexonIdx[j] ].end + 1 < subexons[ subexonIdx[j + 1] ].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2774 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2775 ++intronCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2776 anchorIdx = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2777 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2778 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2779 if ( intronCnt != 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2780 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2781
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2782 int anchorExonLength[2] = {0, 0};
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2783 int tag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2784 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2785 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2786 anchorExonLength[tag] += subexons[ subexonIdx[j] ].end - subexons[ subexonIdx[j] ].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2787 if ( tag == 0 && subexons[ subexonIdx[j] ].end + 1 < subexons[ subexonIdx[j + 1] ].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2788 ++tag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2789 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2790
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2791 int flag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2792 if ( subexons[ subexonIdx[anchorIdx] ].rightStrand == mainStrand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2793 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2794 j = subexonIdx[ anchorIdx ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2795 if ( subexons[j].end - subexons[j].start + 1 <= 20 ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2796 ( subexons[j+ 1].start == subexons[j].end + 1 && subexons[j + 1].end - subexons[j + 1].start + 1 <= 20
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2797 && subexons[j + 1].rightStrand == mainStrand ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2798 ++flag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2799 j = subexonIdx[ anchorIdx + 1 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2800 if ( subexons[j].end - subexons[j].start + 1 <= 20 ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2801 ( subexons[j].start == subexons[j - 1].end + 1 && subexons[j - 1].end - subexons[j - 1].start + 1 <= 20
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2802 && subexons[j - 1].leftStrand == mainStrand ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2803 ++flag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2804 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2805
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2806 if ( anchorExonLength[0] <= 25 || anchorExonLength[1] <= 25 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2807 flag = 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2808
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2809 // the alignment support the intron must be unique and has enough support.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2810 int support = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2811 int uniqSupport = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2812 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2813 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2814 if ( !IsConstraintInTranscript( transcripts[i], scc[ tc[j].i ] ) || !IsConstraintInTranscript( transcripts[i], scc[ tc[j].j ] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2815 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2816 if ( ( scc[ tc[j].i ].vector.Test( subexonIdx[ anchorIdx ] ) && scc[ tc[j].i ].vector.Test( subexonIdx[ anchorIdx + 1 ] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2817 || ( scc[ tc[j].j ].vector.Test( subexonIdx[ anchorIdx ] ) && scc[ tc[j].j ].vector.Test( subexonIdx[ anchorIdx + 1 ] ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2818 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2819 support += tc[j].support ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2820 uniqSupport += tc[j].uniqSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2821 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2822
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2823 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2824
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2825 if ( (double)uniqSupport < 0.3 * support || support < txptMinReadDepth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2826 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2827 flag = 2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2828 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2829
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2830 if ( flag == 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2831 transcripts[i].abundance = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2832
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2833 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2834 tcnt = RemoveNegativeAbundTranscripts( transcripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2835
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2836 return transcripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2837 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2838
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2839 void TranscriptDecider::ComputeTranscriptsScore( struct _subexon *subexons, int seCnt, std::map<int, int> *subexonChainSupport, std::vector<struct _transcript> &transcripts )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2840 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2841 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2842 int tcnt = transcripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2843 struct _constraint tmpC ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2844 tmpC.vector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2845
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2846 for ( i = 0 ; i < tcnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2847 transcripts[i].correlationScore = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2848
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2849 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2850 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2851 for ( std::map<int, int>::iterator it = subexonChainSupport[i].begin() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2852 it != subexonChainSupport[i].end() ; ++it )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2853 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2854 if ( sampleCnt >= 0 && ( it->second < 3 || it->second < (int)( 0.1 * sampleCnt ) ) && it->second <= sampleCnt / 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2855 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2856
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2857 tmpC.vector.Reset() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2858 tmpC.vector.Set( i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2859 tmpC.vector.Set( it->first ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2860 tmpC.first = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2861 tmpC.last = it->first ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2862
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2863 for ( j = 0 ; j < tcnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2864 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2865 if ( IsConstraintInTranscript( transcripts[j], tmpC ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2866 ++transcripts[j].correlationScore ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2867 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2868 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2869 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2870
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2871 tmpC.vector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2872 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2873
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2874 int TranscriptDecider::Solve( struct _subexon *subexons, int seCnt, std::vector<Constraints> &constraints, SubexonCorrelation &subexonCorrelation )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2875 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2876 int i, j, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2877 int cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2878 int *f = new int[seCnt] ; // this is a general buffer for a type of usage.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2879 bool useDP = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2880
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2881 compatibleTestVectorT.Init( seCnt ) ; // this is the bittable used in compatible test function.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2882 compatibleTestVectorC.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2883
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2884 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2885 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2886 subexons[i].canBeStart = subexons[i].canBeEnd = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2887
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2888 if ( subexons[i].prevCnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2889 subexons[i].canBeStart = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2890 else if ( subexons[i].leftClassifier < canBeSoftBoundaryThreshold && subexons[i].leftClassifier != -1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2891 && subexons[i].leftStrand != 0 ) // The case of overhang.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2892 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2893 // We then look into whether there is a left-side end already showed up before this subexon in this region of subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2894 bool flag = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2895 for ( j = i - 1 ; j >= 0 ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2896 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2897 if ( subexons[j].end + 1 != subexons[j + 1].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2898 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2899 if ( subexons[i].canBeStart == true )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2900 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2901 flag = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2902 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2903 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2904 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2905 subexons[i].canBeStart = flag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2906 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2907
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2908 if ( subexons[i].nextCnt == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2909 subexons[i].canBeEnd = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2910 else if ( subexons[i].rightClassifier < canBeSoftBoundaryThreshold && subexons[i].rightClassifier != -1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2911 && subexons[i].rightStrand != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2912 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2913 subexons[i].canBeEnd = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2914 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2915 // Remove other soft end already showed up in this region of subexons.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2916 if ( subexons[i].canBeEnd == true )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2917 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2918 for ( j = i - 1 ; j >= 0 ; --j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2919 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2920 if ( subexons[j].end + 1 != subexons[j + 1].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2921 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2922 if ( subexons[j].canBeEnd == true )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2923 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2924 subexons[j].canBeEnd = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2925 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2926 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2927 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2928 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2929 //printf( "%d: %d %lf\n", subexons[i].canBeStart, subexons[i].prevCnt, subexons[i].leftClassifier ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2930 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2931
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2932 // Go through the cases of mixture region to set canBeStart/End.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2933 // e.g: +[...]+_____+[....]-...]+____+[..)_____-[...]-
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2934 // ^ then we need to force a start point here.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2935 // Do we need to associate a strand information with canBeStart, canBeEnd?
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2936 for ( i = 0 ; i < seCnt ; )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2937 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2938 // [i, j) is a region.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2939 for ( j = i + 1 ; j < seCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2940 if ( subexons[j].start > subexons[j - 1].end + 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2941 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2942 if ( subexons[i].canBeStart == false ) // then subexons[i] must has a hard left boundary.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2943 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2944 int leftStrandCnt[2] = {0, 0} ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2945 for ( k = i ; k < j ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2946 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2947 if ( !SubexonGraph::IsSameStrand( subexons[k].rightStrand, subexons[i].leftStrand ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2948 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2949 if ( subexons[k].leftStrand != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2950 ++leftStrandCnt[ ( subexons[k].leftStrand + 1 ) / 2 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2951 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2952 if ( k < j && leftStrandCnt[ ( subexons[k].rightStrand + 1 ) / 2 ] == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2953 subexons[i].canBeStart = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2954 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2955
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2956 if ( subexons[j - 1].canBeEnd == false )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2957 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2958 int rightStrandCnt[2] = {0, 0} ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2959 for ( k = j - 1 ; k >= i ; --k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2960 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2961 if ( !SubexonGraph::IsSameStrand( subexons[k].leftStrand, subexons[j - 1].rightStrand ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2962 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2963 if ( subexons[k].rightStrand != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2964 ++rightStrandCnt[ ( subexons[k].rightStrand + 1 ) / 2 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2965 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2966 if ( k >= i && rightStrandCnt[ ( subexons[k].leftStrand + 1 ) / 2 ] == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2967 subexons[j - 1].canBeEnd = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2968 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2969
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2970 //if ( subexons[i].start == 6870264)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2971 // printf( "hi %d %d\n",i , subexons[i].canBeStart ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2972 i = j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2973 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2974 /*for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2975 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2976 printf( "%d %d: %d %d\n", subexons[i].start, subexons[i].end, subexons[i].canBeStart, subexons[i].canBeEnd ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2977 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2978
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2979 // Find the gene ids.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2980 baseGeneId = subexons[0].lcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2981 usedGeneId = subexons[0].rcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2982 defaultGeneId[0] = defaultGeneId[1] = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2983 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2984 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2985 if ( subexons[i].geneId < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2986 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2987
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2988 //if ( baseGeneId == -1 || subexons[i].geneId < baseGeneId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2989 // baseGeneId = subexons[i].geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2990 //if ( subexons[i].geneId > usedGeneId )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2991 // usedGeneId = subexons[i].geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2992
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2993 if ( ( subexons[i].rightStrand == -1 || subexons[i].leftStrand == -1 ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2994 ( defaultGeneId[0] == -1 || subexons[i].geneId < defaultGeneId[0] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2995 defaultGeneId[0] = subexons[i].geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2996 if ( ( subexons[i].rightStrand == 1 || subexons[i].leftStrand == 1 ) &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2997 ( defaultGeneId[1] == -1 || subexons[i].geneId < defaultGeneId[1] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2998 defaultGeneId[1] = subexons[i].geneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2999 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3000 if ( defaultGeneId[0] == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3001 defaultGeneId[0] = baseGeneId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3002 if ( defaultGeneId[1] == -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3003 defaultGeneId[1] = usedGeneId - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3004
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3005 // Go through the constraints to find the chain of subexons that should be kept.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3006 std::map<int, int> *subexonChainSupport = new std::map<int, int>[ seCnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3007 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3008 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3009 std::vector<int> subexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3010 std::vector<struct _pair32> chain ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3011
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3012 int tcCnt = constraints[i].constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3013 int size ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3014 for ( j = 0 ; j < tcCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3015 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3016 struct _constraint c = constraints[i].constraints[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3017 if ( c.uniqSupport < 0.95 * c.support || c.support < 3 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3018 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3019
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3020 subexonIdx.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3021 c.vector.GetOnesIndices( subexonIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3022 size = subexonIdx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3023
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3024 for ( k = 0 ; k < size - 1 ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3025 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3026 struct _pair32 p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3027
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3028 p.a = subexonIdx[k] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3029 p.b = subexonIdx[k + 1] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3030 //if ( subexons[p.a].end + 1 == 113235898 && subexons[ p.b ].start + 1 == 113236121 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3031 // printf( "bad bad %d %d %d\n", i, c.uniqSupport, c.support ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3032
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3033 if ( subexons[ p.a ].end + 1 < subexons[ p.b ].start )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3034 chain.push_back( p ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3035 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3036 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3037 // Remove redundancy.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3038 sort( chain.begin(), chain.end(), CompSortPairs ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3039 size = chain.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3040 k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3041 for ( j = 1 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3042 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3043 if ( chain[j].a == chain[k].a && chain[j].b == chain[k].b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3044 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3045 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3046 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3047 ++k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3048 chain[k] = chain[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3049 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3050 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3051 chain.resize( k + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3052
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3053 // Add those to sample count
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3054 size = k + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3055 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3056 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3057 if ( subexonChainSupport[ chain[j].a ].count( chain[j].b ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3058 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3059 ++subexonChainSupport[ chain[j].a ][ chain[j].b ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3060 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3061 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3062 subexonChainSupport[ chain[j].a ][ chain[j].b ] = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3063 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3064 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3065
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3066 /*for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3067 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3068 printf( "%d:", i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3069 for ( std::map<int, int>::iterator it = subexonChainSupport[i].begin() ; it != subexonChainSupport[i].end() ; ++it )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3070 printf( " (%d %d) ", it->first, it->second ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3071 printf( "\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3072 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3073
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3074 //printf( "%d %d %d\n", defaultGeneId[0], baseGeneId, usedGeneId ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3075 cnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3076 memset( f, -1, sizeof( int ) * seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3077 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3078 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3079 if ( subexons[i].canBeStart )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3080 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3081 cnt += SubTranscriptCount( i, subexons, f ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3082 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3083 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3084 if ( cnt <= USE_DP )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3085 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3086 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3087 if ( f[i] > USE_DP )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3088 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3089 useDP = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3090 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3091 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3092 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3093 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3094 useDP = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3095 if ( !useDP )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3096 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3097 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3098 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3099 double msize = constraints[i].matePairs.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3100 double csize = constraints[i].constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3101 if ( cnt > ( csize / msize ) * ( csize / msize ) * seCnt
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3102 && cnt > USE_DP / ( msize * msize ) && cnt > 50 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3103 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3104 useDP = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3105 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3106 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3107 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3108 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3109
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3110 int atCnt = cnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3111 printf( "%d: atCnt=%d seCnt=%d %d %d %d\n", subexons[0].start + 1, atCnt, seCnt, useDP, (int)constraints[0].constraints.size(), (int)constraints[0].matePairs.size() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3112 fflush( stdout ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3113 std::vector<struct _transcript> alltranscripts ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3114
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3115 if ( !useDP )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3116 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3117 int origSize = atCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3118 alltranscripts.resize( atCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3119 for ( i = 0 ; i < atCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3120 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3121 alltranscripts[i].seVector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3122 alltranscripts[i].correlationScore = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3123 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3124
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3125 atCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3126 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3127 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3128 if ( subexons[i].canBeStart )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3129 EnumerateTranscript( i, 0, f, 0, subexons, subexonCorrelation, 1, alltranscripts, atCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3130 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3131
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3132 for ( i = atCnt ; i < origSize ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3133 alltranscripts[i].seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3134
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3135 alltranscripts.resize( atCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3136 //printf( "transcript cnt: %d\n", atCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3137 //printf( "%d %d\n", alltranscripts[0].seVector.Test( 1 ), constraints[0].matePairs.size() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3138 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3139 else // Use dynamic programming to pick a set of candidate transcript.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3140 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3141 std::vector<struct _transcript> sampleTranscripts ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3142
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3143 // pre allocate the memory.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3144 struct _dpAttribute attr ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3145 attr.f1 = new struct _dp[seCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3146 if ( seCnt <= 10000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3147 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3148 attr.f2 = new struct _dp*[seCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3149 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3150 attr.f2[i] = new struct _dp[seCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3151 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3152 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3153 attr.f2 = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3154
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3155 hashMax = HASH_MAX ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3156 if (seCnt > 500)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3157 hashMax = 1000003 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3158 else if (seCnt > 1000)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3159 hashMax = 10000019 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3160 else if (seCnt > 1500)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3161 hashMax = 20000003 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3162
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3163 attr.hash = dpHash ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3164 if ( hashMax != HASH_MAX )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3165 attr.hash = new struct _dp[hashMax] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3166
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3167 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3168 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3169 attr.f1[i].seVector.Nullify() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3170 attr.f1[i].seVector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3171 for ( j = i ; j < seCnt ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3172 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3173 attr.f2[i][j].seVector.Nullify() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3174 attr.f2[i][j].seVector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3175 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3176 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3177 for ( i = 0 ; i < hashMax ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3178 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3179 attr.hash[i].seVector.Nullify() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3180 attr.hash[i].seVector.Init( seCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3181 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3182
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3183 // select candidate transcripts from each sample.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3184 struct _pair32 *sampleComplexity = new struct _pair32[ sampleCnt ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3185 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3186 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3187 sampleComplexity[i].a = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3188 sampleComplexity[i].b = constraints[i].constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3189 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3190 qsort( sampleComplexity, sampleCnt, sizeof( sampleComplexity[0] ), CompPairsByB ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3191 int downsampleCnt = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3192
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3193 for ( i = sampleCnt - 1 ; i >= 0 ; --i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3194 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3195 sampleTranscripts.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3196 int iterBound = constraints[ sampleComplexity[i].a ].constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3197 if ( i < sampleCnt - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3198 iterBound = 100 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3199
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3200 if ( i < sampleCnt - 10 && alltranscripts.size() > 1000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3201 iterBound = 10 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3202 //printf( "%d %d: %d %d %d %d\n", subexons[0].start + 1, sampleComplexity[i].a, constraints[ sampleComplexity[i].a ].constraints.size(), constraints[ sampleComplexity[i].a ].matePairs.size(),
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3203 // alltranscripts.size(), iterBound ) ; fflush( stdout ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3204 if ( maxDpConstraintSize > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3205 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3206 Constraints truncatedConstraints ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3207 truncatedConstraints.TruncateConstraintsCoverFrom( constraints[ sampleComplexity[i].a ], seCnt, maxDpConstraintSize ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3208 PickTranscriptsByDP( subexons, seCnt, iterBound, truncatedConstraints,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3209 subexonCorrelation, attr, sampleTranscripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3210 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3211 else if ( ( constraints[ sampleComplexity[i].a ].constraints.size() > 1000
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3212 && constraints[ sampleComplexity[i].a ].constraints.size() * 10 < constraints[ sampleComplexity[i].a ].matePairs.size() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3213 || ( downsampleCnt > 0 && (int)constraints[ sampleComplexity[i].a ].constraints.size() >= downsampleCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3214 || seCnt >= 1500 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3215 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3216 Constraints downsampledConstraints ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3217 int stride = (int)constraints[ sampleComplexity[i].a ].matePairs.size() / (int)constraints[ sampleComplexity[i].a ].constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3218 if ( downsampleCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3219 stride = (int)constraints[ sampleComplexity[i].a ].constraints.size() / downsampleCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3220 if ( stride < 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3221 stride = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3222 downsampledConstraints.DownsampleConstraintsFrom( constraints[ sampleComplexity[i].a ], stride ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3223 if ( downsampleCnt <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3224 downsampleCnt = downsampledConstraints.constraints.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3225 if ( iterBound <= 10 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3226 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3227 PickTranscriptsByDP( subexons, seCnt, iterBound, downsampledConstraints, subexonCorrelation, attr, sampleTranscripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3228 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3229 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3230 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3231 PickTranscriptsByDP( subexons, seCnt, iterBound, constraints[ sampleComplexity[i].a ], subexonCorrelation, attr, sampleTranscripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3232 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3233 int size = sampleTranscripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3234 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3235 alltranscripts.push_back( sampleTranscripts[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3236
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3237 // we can further pick a smaller subsets of transcripts here if the number is still to big.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3238 CoalesceSameTranscripts( alltranscripts ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3239
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3240 AugmentTranscripts( subexons, alltranscripts, 1000, false ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3241 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3242
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3243 // release the memory.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3244 delete[] sampleComplexity ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3245 for ( i = 0 ; i < seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3246 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3247 attr.f1[i].seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3248 for ( j = i ; j < seCnt && attr.f2 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3249 attr.f2[i][j].seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3250 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3251 for ( i = 0 ; i < hashMax ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3252 attr.hash[i].seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3253
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3254 delete[] attr.f1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3255 for ( i = 0 ; i < seCnt && attr.f2 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3256 delete[] attr.f2[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3257 delete[] attr.f2 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3258 if (hashMax != HASH_MAX)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3259 delete[] attr.hash ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3260
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3261 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3262
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3263 transcriptId = new int[usedGeneId - baseGeneId] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3264 std::vector<struct _transcript> *predTranscripts = new std::vector<struct _transcript>[sampleCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3265
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3266 atCnt = alltranscripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3267 for ( i = 0 ; i < atCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3268 alltranscripts[i].FPKM = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3269
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3270 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3271 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3272 int size = alltranscripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3273 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3274 alltranscripts[j].abundance = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3275 //printf( "pick: %d: %d %d\n", i, constraints[i].matePairs.size(), alltranscripts.size() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3276 PickTranscripts( subexons, alltranscripts, constraints[i], subexonCorrelation, predTranscripts[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3277
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3278 /*double tmp = FPKMFraction ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3279 FPKMFraction = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3280 size = predTranscripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3281 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3282 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3283 ConvertTranscriptAbundanceToFPKM( subexons, predTranscripts[j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3284 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3285 RefineTranscripts( subexons, seCnt, predTranscripts, constraints[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3286 FPKMFraction = tmp ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3287
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3288 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3289
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3290 atCnt = alltranscripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3291 int *txptSampleSupport = new int[atCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3292 memset( txptSampleSupport, 0, sizeof( int ) * atCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3293 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3294 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3295 int size = predTranscripts[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3296 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3297 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3298 ++txptSampleSupport[ predTranscripts[i][j].id ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3299 ++alltranscripts[ predTranscripts[i][j].id ].FPKM ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3300 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3301 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3302
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3303 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3304 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3305 int size = alltranscripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3306 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3307 alltranscripts[j].abundance = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3308 //printf( "pick: %d: %d %d\n", i, constraints[i].matePairs.size(), alltranscripts.size() ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3309
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3310 size = predTranscripts[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3311 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3312 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3313 predTranscripts[i][j].seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3314 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3315 predTranscripts[i].clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3316 PickTranscripts( subexons, alltranscripts, constraints[i], subexonCorrelation, predTranscripts[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3317 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3318
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3319 std::vector<int> *rawPredTranscriptIds = new std::vector<int>[sampleCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3320 std::vector<double> *rawPredTranscriptAbundance = new std::vector<double>[sampleCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3321 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3322 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3323 int size = predTranscripts[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3324
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3325 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3326 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3327 rawPredTranscriptIds[i].push_back( predTranscripts[i][j].id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3328 rawPredTranscriptAbundance[i].push_back( predTranscripts[i][j].abundance ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3329 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3330 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3331
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3332 // Do the filtration.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3333 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3334 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3335 int size = predTranscripts[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3336 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3337 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3338 ConvertTranscriptAbundanceToFPKM( subexons, predTranscripts[i][j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3339 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3340 size = RefineTranscripts( subexons, seCnt, false, subexonChainSupport, txptSampleSupport, predTranscripts[i], constraints[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3341
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3342 // Recompute the abundance.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3343 AbundanceEstimation( subexons, seCnt, constraints[i], predTranscripts[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3344 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3345 ConvertTranscriptAbundanceToFPKM( subexons, predTranscripts[i][j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3346 size = RefineTranscripts( subexons, seCnt, true, subexonChainSupport, txptSampleSupport, predTranscripts[i], constraints[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3347
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3348 //ComputeTranscriptsScore( subexons, seCnt, subexonChainSupport, predTranscripts[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3349 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3350
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3351 // Rescue some filtered transcripts
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3352 memset( txptSampleSupport, 0, sizeof( int ) * atCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3353 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3354 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3355 int size = predTranscripts[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3356 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3357 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3358 ++txptSampleSupport[ predTranscripts[i][j].id ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3359 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3360 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3361
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3362 bool *predicted = new bool[atCnt] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3363 for ( i = 0 ; i < sampleCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3364 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3365 memset( predicted, false, sizeof( bool ) * atCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3366 if ( predTranscripts[i].size() != rawPredTranscriptIds[i].size() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3367 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3368 int psize = predTranscripts[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3369 int rsize = rawPredTranscriptIds[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3370 int tcCnt = constraints[i].matePairs.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3371
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3372 for ( j = 0 ; j < psize ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3373 predicted[ predTranscripts[i][j].id ] = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3374
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3375 for ( j = 0 ; j < rsize ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3376 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3377 int id = rawPredTranscriptIds[i][j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3378 if ( predicted[ id ] == false &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3379 ( txptSampleSupport[ id ] >= 3 && txptSampleSupport[id] >= 0.25 * sampleCnt ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3380 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3381 struct _transcript nt = alltranscripts[id] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3382 nt.seVector.Nullify() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3383 nt.seVector.Duplicate( alltranscripts[id].seVector ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3384 nt.constraintsSupport = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3385 nt.correlationScore = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3386 nt.abundance = rawPredTranscriptAbundance[i][j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3387 nt.id = id ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3388 predTranscripts[i].push_back( nt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3389 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3390 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3391 if ( psize != predTranscripts[i].size() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3392 AbundanceEstimation( subexons, seCnt, constraints[i], predTranscripts[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3393 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3394
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3395 int size = predTranscripts[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3396
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3397 if ( 0 ) //size == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3398 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3399 //AugmentTranscripts( subexons, predTranscripts[i], false ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3400
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3401 int l = predTranscripts[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3402 int tcCnt = constraints[i].matePairs.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3403 for ( j = 0 ; j < l ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3404 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3405 predTranscripts[i][j].abundance = 1.0 / alignments.readLen ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3406 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3407 AbundanceEstimation( subexons, seCnt, constraints[i], predTranscripts[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3408
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3409 std::vector<int> subexonIdx ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3410 for ( j = 0 ; j < l ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3411 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3412 subexonIdx.clear() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3413 predTranscripts[i][j].seVector.GetOnesIndices( subexonIdx ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3414 int subexonIdxCnt = subexonIdx.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3415 int len = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3416 for ( k = 0 ; k < subexonIdxCnt ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3417 len += subexons[ subexonIdx[k] ].end - subexons[ subexonIdx[k] ].start + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3418
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3419 if ( predTranscripts[i][j].abundance * alignments.readLen / len < 2.0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3420 predTranscripts[i][j].abundance = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3421 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3422 ConvertTranscriptAbundanceToFPKM( subexons, predTranscripts[i][j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3423
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3424 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3425 RemoveNegativeAbundTranscripts( predTranscripts[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3426 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3427
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3428 // Output
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3429 size = predTranscripts[i].size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3430 InitTranscriptId() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3431 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3432 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3433 OutputTranscript( i, subexons, predTranscripts[i][j] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3434 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3435 for ( j = 0 ; j < size ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3436 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3437 predTranscripts[i][j].seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3438 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3439 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3440
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3441 delete []predicted ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3442 delete []transcriptId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3443 delete []predTranscripts ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3444 delete []rawPredTranscriptIds ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3445 delete []rawPredTranscriptAbundance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3446 delete []txptSampleSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3447
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3448 atCnt = alltranscripts.size() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3449 for ( i = 0 ; i < atCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3450 alltranscripts[i].seVector.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3451 compatibleTestVectorT.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3452 compatibleTestVectorC.Release() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3453 delete[] f ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3454 delete[] subexonChainSupport ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3455 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3456 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3457
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3458 void *TranscriptDeciderSolve_Wrapper( void *a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3459 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3460 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3461
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3462 struct _transcriptDeciderThreadArg &arg = *( (struct _transcriptDeciderThreadArg *)a ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3463 TranscriptDecider transcriptDecider( arg.FPKMFraction, arg.classifierThreshold, arg.txptMinReadDepth, arg.sampleCnt, *( arg.alignments ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3464 transcriptDecider.SetNumThreads( arg.numThreads + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3465 transcriptDecider.SetMultiThreadOutputHandler( arg.outputHandler ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3466 transcriptDecider.SetMaxDpConstraintSize( arg.maxDpConstraintSize ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3467 transcriptDecider.Solve( arg.subexons, arg.seCnt, arg.constraints, arg.subexonCorrelation ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3468
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3469 int start = arg.subexons[0].start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3470 int end = arg.subexons[ arg.seCnt - 1 ].end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3471 int chrId = arg.subexons[0].chrId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3472 // Release memory
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3473 for ( i = 0 ; i < arg.seCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3474 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3475 delete[] arg.subexons[i].prev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3476 delete[] arg.subexons[i].next ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3477 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3478 delete[] arg.subexons ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3479
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3480 // Put the work id back to the free threads queue.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3481 pthread_mutex_lock( arg.ftLock ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3482 arg.freeThreads[ *( arg.ftCnt ) ] = arg.tid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3483 ++*( arg.ftCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3484 if ( *( arg.ftCnt ) == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3485 pthread_cond_signal( arg.fullWorkCond ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3486 pthread_mutex_unlock( arg.ftLock) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3487 printf( "Thread %d: %s %d %d finished.\n", arg.tid, arg.alignments->GetChromName(chrId), start + 1, end + 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3488 fflush( stdout ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3489
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3490
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3491 pthread_exit( NULL ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3492 }