annotate PsiCLASS-1.0.2/alignments.hpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 // The class handles reading the bam file
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 #ifndef _LSONG_RSCAF_ALIGNMENT_HEADER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #define _LSONG_RSCAF_ALIGNMENT_HEADER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 #include "samtools-0.1.19/sam.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 #include <map>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 #include <string>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 #include <assert.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 #include <iostream>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 #include <math.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 #include "defs.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 class Alignments
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 private:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 samfile_t *fpSam ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 bam1_t *b ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 char fileName[1024] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 bool opened ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 std::map<std::string, int> chrNameToId ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 bool allowSupplementary ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 bool allowClip ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 bool hasClipHead, hasClipTail ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 int segmentsSum ; // the sum of segments.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 bool atBegin ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 bool atEnd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 static int CompInt( const void *p1, const void *p2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 return (*(int *)p1 ) - (*(int *)p2 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 void Open()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 fpSam = samopen( fileName, "rb", 0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 if ( !fpSam->header )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 fprintf( stderr, "Can not open %s.\n", fileName ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 // Collect the chromosome information
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 for ( int i = 0 ; i < fpSam->header->n_targets ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 std::string s( fpSam->header->target_name[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 chrNameToId[s] = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 opened = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 atBegin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 atEnd = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 public:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 struct _pair segments[MAX_SEG_COUNT] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 int segCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 int totalReadCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 int fragLen, fragStdev ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 int readLen ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 bool matePaired ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 Alignments()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 b = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 opened = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 atBegin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 atEnd = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 allowSupplementary = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 allowClip = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 totalReadCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 fragLen = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 fragStdev = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 readLen = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 matePaired = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 ~Alignments()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 if ( b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 bam_destroy1( b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 void Open( char *file )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 strcpy( fileName, file ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 Open() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 void Rewind()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 Close() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 Open() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 atBegin = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 atEnd = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 void Close()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 samclose( fpSam ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 fpSam = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 bool IsOpened()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 return opened ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 bool IsAtBegin()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 return atBegin ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 bool IsAtEnd()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 return atEnd ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 bool HasClip()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 return hasClipHead || hasClipTail ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 bool HasClipHead()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 return hasClipHead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 bool HasClipTail()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 return hasClipTail ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 int Next()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 int start = 0, len = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 uint32_t *rawCigar ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 if ( atBegin == true )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 totalReadCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 atBegin = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 if ( b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 bam_destroy1( b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 b = bam_init1() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 if ( samread( fpSam, b ) <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 atEnd = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 if ( ( b->core.flag & 0x900 ) == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 ++totalReadCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 if ( b->core.flag & 0xC )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 //if ( ( b->core.flag & 0x900 ) == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 // to many repeat.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 if ( bam_aux_get( b, "NH" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 if ( bam_aux2i( bam_aux_get( b, "NH" ) ) >= 5 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 // Compute the exons segments from the reads
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 segCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 start = b->core.pos ; //+ 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 rawCigar = bam1_cigar( b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 // Check whether the query length is compatible with the read
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 if ( bam_cigar2qlen( &b->core, rawCigar ) != b->core.l_qseq )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 bool clipMiddle = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 int clipSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 hasClipHead = hasClipTail = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 len = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 segmentsSum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 for ( i = 0 ; i < b->core.n_cigar ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 int op = rawCigar[i] & BAM_CIGAR_MASK ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 int num = rawCigar[i] >> BAM_CIGAR_SHIFT ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 switch ( op )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 case BAM_CMATCH:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 case BAM_CDEL:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 len += num ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 case BAM_CSOFT_CLIP:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 case BAM_CHARD_CLIP:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 case BAM_CPAD:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 if ( i == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 hasClipHead = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 else if ( i == b->core.n_cigar - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 hasClipTail = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 clipMiddle = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 clipSum += num ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 case BAM_CINS:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 num = 0 ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 case BAM_CREF_SKIP:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 segments[ segCnt ].a = start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 segments[ segCnt ].b = start + len - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 ++segCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 segmentsSum += len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 start = start + len + num ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 len = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 } break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 default:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 len += num ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 if ( clipMiddle ) // should never happend
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 if ( clipSum >= 2 && !allowClip )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 if ( len > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 segments[ segCnt ].a = start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 segments[ segCnt ].b = start + len - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 ++segCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 segmentsSum += len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 /*if ( !strcmp( bam1_qname( b ), "chr1:109656301-109749401W:ENST00000490758.2:381:1480:1090:1290:X" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 for ( i = 0 ; i < segCnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 printf( "(%d %d) ", segments[i].a, segments[i].b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 printf( "\n" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 // Check whether the mates are compatible
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 //int mChrId = b->core.mtid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 int64_t mPos = b->core.mpos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 if ( b->core.mtid == b->core.tid )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 for ( i = 0 ; i < segCnt - 1 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 if ( mPos >= segments[i].b && mPos <= segments[i + 1].a )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 if ( i < segCnt - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 return 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 int GetChromId()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 return b->core.tid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 char* GetChromName( int tid )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 return fpSam->header->target_name[ tid ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 int GetChromIdFromName( const char *s )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 std::string ss( s ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 if ( chrNameToId.find( ss ) == chrNameToId.end() )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 printf( "Unknown genome name: %s\n", s ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 return chrNameToId[ss] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 int GetChromLength( int tid )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 return fpSam->header->target_len[ tid ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 int GetChromCount()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 return fpSam->header->n_targets ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 void GetMatePosition( int &chrId, int64_t &pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 if ( b->core.flag & 0x8 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 chrId = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 pos = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 chrId = b->core.mtid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 pos = b->core.mpos ; //+ 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 int GetRepeatPosition( int &chrId, int64_t &pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 // Look at the CC field.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 if ( !bam_aux_get( b, "CC" ) || !bam_aux_get( b, "CP" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 chrId = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 pos = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 std::string s( bam_aux2Z( bam_aux_get(b, "CC" ) ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 chrId = chrNameToId[ s ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 pos = bam_aux2i( bam_aux_get( b, "CP" ) ) ;// Possible error for 64bit
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 return 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 void GetFileName( char *buffer )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 strcpy( buffer, fileName ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 int GetReadLength()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 return b->core.l_qseq ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 int GetRefCoverLength()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 return segmentsSum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 bool IsFirstMate()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 if ( b->core.flag & 0x40 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 bool IsReverse()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 if ( b->core.flag & 0x10 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 bool IsMateReverse()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 if ( b->core.flag & 0x20 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 char *GetReadId()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 return bam1_qname( b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 bool IsUnique()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 if ( bam_aux_get( b, "NH" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 if ( bam_aux2i( bam_aux_get( b, "NH" ) ) > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 if ( IsSupplementary() && GetFieldZ( (char *)"XZ" ) != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 bool IsPrimary()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 if ( ( b->core.flag & 0x900 ) == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 // -1:minus, 0: unknown, 1:plus
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 int GetStrand()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 if ( segCnt == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 if ( bam_aux_get( b, "XS" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 if ( bam_aux2A( bam_aux_get( b, "XS" ) ) == '-' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 return 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 int GetFieldI( char *f )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 if ( bam_aux_get( b, f ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 return bam_aux2i( bam_aux_get( b, f ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 return -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 char *GetFieldZ( char *f )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 if ( bam_aux_get( b, f ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 return bam_aux2Z( bam_aux_get( b, f ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424 return NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 int GetNumberOfHits()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 if ( bam_aux_get( b, "NH" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431 return bam_aux2i( bam_aux_get( b, "NH" ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 return 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 bool IsSupplementary()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438 if ( ( b->core.flag & 0x800 ) == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 void SetAllowSupplementary( bool in )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446 allowSupplementary = in ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449 void SetAllowClip( bool in )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 allowClip = in ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 bool IsGCRich( bool threshold = 0.9 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 int i = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 int gcCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 for ( i = 0 ; i < b->core.l_qseq ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 int bit = bam1_seqi( bam1_seq( b ), i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 if ( bit == 2 || bit == 4 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 ++gcCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 if ( gcCnt >= threshold * b->core.l_qseq )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 void GetGeneralInfo( bool stopEarly = false )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470 int i, k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 const int sampleMax = 1000000 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 int *lens = new int[sampleMax] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 int *mateDiff = new int[sampleMax] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 int lensCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476 int mateDiffCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477 bool end = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 if ( b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 bam_destroy1( b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 b = bam_init1() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 if ( samread( fpSam, b ) <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 end = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 if ( b->core.flag & 0xC )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 if ( ( b->core.flag & 0x900 ) == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 if ( end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501 if ( lensCnt < sampleMax )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 lens[ lensCnt ] = b->core.l_qseq ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 ++lensCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507 if ( mateDiffCnt < sampleMax && b->core.tid == b->core.mtid
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 && b->core.pos < b->core.mpos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510 mateDiff[ mateDiffCnt ] = b->core.mpos - b->core.pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 ++mateDiffCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513 ++totalReadCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 if ( totalReadCnt >= sampleMax && stopEarly )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518 // Get the read length info and fragment length info
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519 qsort( lens, lensCnt, sizeof( int ), CompInt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520 readLen = lens[ lensCnt - 1 ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 if ( mateDiffCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 matePaired = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 qsort( mateDiff, mateDiffCnt, sizeof( int ), CompInt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 long long int sum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528 long long int sumsq = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530 for ( i = 0 ; i < mateDiffCnt * 0.7 ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532 sum += ( mateDiff[i] + readLen );
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533 sumsq += ( mateDiff[i] + readLen ) * ( mateDiff[i] + readLen ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535 k = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536 fragLen = (int)( sum / k ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 fragStdev = (int)sqrt( sumsq / k - fragLen * fragLen ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541 fragLen = readLen ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542 fragStdev = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 //printf( "readLen = %d\nfragLen = %d, fragStdev = %d\n", readLen, fragLen, fragStdev ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
545 delete[] lens ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
546 delete[] mateDiff ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
547 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
548 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
549 #endif