annotate PsiCLASS-1.0.2/FindJunction.cpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 /*
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 Find junctions from the SAM file generated by Tophat2.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 The SAM file should be sorted.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 The junction is the start and the end coordinates of the spliced region in this program. The output of the junction is a bit different.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 Usage: ./a.out input [option] >output.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 ./a.out -h for help.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 #include "sam.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 #define LINE_SIZE 4097
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 #define QUEUE_SIZE 10001
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 #define HASH_MAX 1000003
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 struct _readTree
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 char id[256] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 int leftAnchor, rightAnchor ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 int pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 //bool secondary ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 bool valid ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 int editDistance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 int NH, cnt ; // if cnt < NH, then it has real secondary match for this splice junction
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 struct _readTree *left, *right ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 int flag ;// The flag from sam head.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 // The structure of a junction
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 struct _junction
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 int start, end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 int readCnt ; // The # of reads containing this junction
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 int secReadCnt ; // The # of reads whose secondary alignment has this junction.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 char strand ; // On '+' or '-' strand
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 int leftAnchor, rightAnchor ; // The longest left and right anchor
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 int oppositeAnchor ; // The longest anchor of the shorter side.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 int uniqEditDistance, secEditDistance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 struct _readTree head ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 char line[LINE_SIZE] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 char col[11][LINE_SIZE] ; // The option fields is not needed.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 char strand ; // Extract XS field
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 char noncanonStrandInfo ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 //bool secondary ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 int NH ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 int editDistance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 int mateStart ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 int filterYS ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 int samFlag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 struct _junction junctionQueue[QUEUE_SIZE] ; // Expected only a few junctions in it for each read. This queue is sorted.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 int qHead, qTail ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 bool flagPrintJunction ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 bool flagPrintAll ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 bool flagStrict ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 int junctionCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 bool anchorBoth ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 bool validRead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 int flank ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 struct _cigarSeg
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 int len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 char type ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 struct _readTree *contradictedReads ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 char nucToNum[26] = { 0, 4, 1, 4, 4, 4, 2,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 4, 4, 4, 4, 4, 4, 4,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 4, 4, 4, 4, 4, 3,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 4, 4, 4, 4, 4, 4 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 void PrintHelp()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 printf(
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 "Prints reads from the SAM/BAM file that containing junctions.\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 "Usage: ./a.out input [option]>output\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 "Options:\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 "\t-j xx [-B]: Output the junctions using 1-based coordinates. The format is \"reference id\" start end \"# of read\" strand.(They are sorted)\n and the xx is an integer means the maximum unqualified anchor length for a splice junction(default=8). If -B, the splice junction must be supported by a read whose both anchors are longer than xx.\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 "\t-a: Output all the junctions, and use non-positive support number to indicate unqualified junctions.\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 "\t-y: If the bits from YS field of bam matches the argument, we filter the alignment (default: 4).\n"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 void GetJunctionInfo( struct _junction &junc, struct _readTree *p )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 if ( p == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 if ( p->valid )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 //if ( junc.start == 22381343 + 1 && junc.end == 22904987 - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 // printf( "%s %d %d %d\n", p->id, p->leftAnchor, p->rightAnchor, p->flag ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 if ( p->cnt < p->NH )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 junc.secEditDistance += p->editDistance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 ++junc.secReadCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 junc.uniqEditDistance += p->editDistance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 ++junc.readCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 int l = p->leftAnchor, r = p->rightAnchor ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 if ( !anchorBoth )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 if ( l > junc.leftAnchor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 junc.leftAnchor = l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 if ( r > junc.rightAnchor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 junc.rightAnchor = r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 if ( l <= r && l > junc.oppositeAnchor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 junc.oppositeAnchor = l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 else if ( r < l && r > junc.oppositeAnchor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 junc.oppositeAnchor = r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 if ( l > flank && r > flank )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 junc.leftAnchor = l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 junc.rightAnchor = r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 if ( l <= r && l > junc.oppositeAnchor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 junc.oppositeAnchor = l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 else if ( r < l && r > junc.oppositeAnchor )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 junc.oppositeAnchor = r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 GetJunctionInfo( junc, p->left ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 GetJunctionInfo( junc, p->right ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 void PrintJunctionReads( struct _junction &junc, struct _readTree *p )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 if ( p == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 if ( p->valid )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 printf( "%s\n", p->id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 PrintJunctionReads( junc, p->left ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 PrintJunctionReads( junc, p->right ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 void PrintJunction( char *chrome, struct _junction &junc )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 int sum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 junc.leftAnchor = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 junc.rightAnchor = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 junc.oppositeAnchor = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 junc.readCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 junc.secReadCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 junc.uniqEditDistance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 junc.secEditDistance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 GetJunctionInfo( junc, &junc.head ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 sum = junc.readCnt + junc.secReadCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 if ( junc.leftAnchor <= flank || junc.rightAnchor <= flank || ( junc.readCnt + junc.secReadCnt <= 0 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 if ( flagPrintAll )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 sum = -sum ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 /*if ( junc.end - junc.start + 1 >= 200000 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 if ( junc.secReadCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 junc.secReadCnt = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 /*if ( junc.oppositeAnchor <= ( ( flank / 2 < 1 ) ? flank / 2 : 1 ) && ( junc.readCnt + junc.secReadCnt ) <= 10 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 if ( junc.readCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 junc.readCnt = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 if ( junc.secReadCnt > 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 junc.secReadCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 printf( "%s %d %d %d %c %d %d %d %d\n", chrome, junc.start - 1, junc.end + 1, sum, junc.strand,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 junc.readCnt, junc.secReadCnt, junc.uniqEditDistance, junc.secEditDistance ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 //PrintJunctionReads( junc, &junc.head ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 void ClearReadTree( struct _readTree *p )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 if ( p == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 return ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 ClearReadTree( p->left ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 ClearReadTree( p->right ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 free( p ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 // Insert to the read tree
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212 bool InsertReadTree( struct _readTree *p, char *id, int l, int r )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 int tmp = strcmp( p->id, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 if ( tmp == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 p->cnt += 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 else if ( tmp < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 if ( p->left )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223 return InsertReadTree( p->left, id, l, r ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 p->left = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 strcpy( p->left->id, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 p->left->leftAnchor = l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 p->left->rightAnchor = r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 //p->left->secondary = secondary ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 p->left->editDistance = editDistance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 p->left->left = p->left->right = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 p->left->valid = validRead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 p->left->cnt = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 p->left->NH = NH ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 p->left->flag = samFlag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 if ( p->right )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 return InsertReadTree( p->right, id, l, r ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 p->right = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247 strcpy( p->right->id, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 p->right->leftAnchor = l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 p->right->rightAnchor = r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 //p->right->secondary = secondary ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 p->right->editDistance = editDistance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 p->right->left = p->right->right = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 p->right->valid = validRead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 p->right->cnt = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 p->right->NH = NH ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 p->right->flag = samFlag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 bool SearchContradictedReads( struct _readTree *p, char *id, int pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 if ( p == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 int tmp = strcmp( p->id, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 if ( tmp == 0 && p->pos == pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 else if ( tmp <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 return SearchContradictedReads( p->left, id, pos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 return SearchContradictedReads( p->right, id, pos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 bool InsertContradictedReads( struct _readTree *p, char *id, int pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 if ( p == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 contradictedReads = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 strcpy( contradictedReads->id, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 contradictedReads->pos = pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 contradictedReads->left = contradictedReads->right = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 int tmp = strcmp( p->id, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 if ( tmp == 0 && p->pos == pos )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 else if ( tmp <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 if ( p->left )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 return InsertContradictedReads( p->left, id, pos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 p->left = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 strcpy( p->left->id, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 p->left->pos = pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 p->left->left = p->left->right = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 if ( p->right )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 return InsertContradictedReads( p->right, id, pos ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 p->right = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 strcpy( p->right->id, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 p->right->pos = pos ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 p->right->left = p->right->right = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 struct _readTree *GetReadTreeNode( struct _readTree *p, char *id )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 if ( p == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 return NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 int tmp = strcmp( p->id, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 if ( tmp == 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 return p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 else if ( tmp < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 return GetReadTreeNode( p->left, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 return GetReadTreeNode( p->right, id ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 // Insert the new junction into the queue,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 // and make sure the queue is sorted.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 // Assume each junction range is only on one strand.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 // l, r is the left and right anchor from a read
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 void InsertQueue( int start, int end, int l, int r )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 i = qTail ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 while ( i != qHead )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 j = i - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 if ( j < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 j = QUEUE_SIZE - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 if ( junctionQueue[j].start < start
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 || ( junctionQueue[j].start == start && junctionQueue[j].end < end ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 junctionQueue[i] = junctionQueue[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 --i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 if ( i < 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 i = QUEUE_SIZE - 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 junctionQueue[i].start = start ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 junctionQueue[i].end = end ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 /*if ( !secondary )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 junctionQueue[i].readCnt = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 junctionQueue[i].secReadCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 junctionQueue[i].readCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 junctionQueue[i].secReadCnt = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369 junctionQueue[i].strand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 junctionQueue[i].leftAnchor = l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371 junctionQueue[i].rightAnchor = r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 strcpy( junctionQueue[i].head.id, col[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 junctionQueue[i].head.valid = validRead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 junctionQueue[i].head.leftAnchor = l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 junctionQueue[i].head.rightAnchor = r ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 junctionQueue[i].head.left = junctionQueue[i].head.right = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378 //junctionQueue[i].head.secondary = secondary ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 junctionQueue[i].head.NH = NH ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 junctionQueue[i].head.cnt = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 junctionQueue[i].head.editDistance = editDistance ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 ++qTail ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 if ( qTail >= QUEUE_SIZE )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 qTail = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 // Search the queue, and remove the heads if its end(start) is smaller than prune.(Because it is impossible to have that junction again)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389 // Add the junction to the queue, if it is not in it.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 // Return true, if it finds the junction. Otherwise, return false.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 bool SearchQueue( int start, int end, int prune, int l, int r )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 int i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 // Test whether this read might be a false alignment.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 i = qHead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 while ( i != qTail )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 struct _readTree *rt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 if ( ( junctionQueue[i].start == start && junctionQueue[i].end < end ) ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 ( junctionQueue[i].start > start && junctionQueue[i].end == end ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 // This alignment is false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 rt = GetReadTreeNode( &junctionQueue[i].head, col[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 // the commented out logic because it is handled by contradicted reads
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 if ( rt != NULL ) //&& ( rt->flag & 0x40 ) != ( samFlag & 0x40 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 if ( rt->leftAnchor <= flank || rt->rightAnchor <= flank )//|| rt->secondary )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 if ( l > flank && r > flank )//&& !secondary )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 rt->valid = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 validRead = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 // Ignore this read
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418 //return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 validRead = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 else if ( ( junctionQueue[i].start == start && junctionQueue[i].end > end ) ||
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424 ( junctionQueue[i].start < start && junctionQueue[i].end == end ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 // This other alignment is false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 rt = GetReadTreeNode( &junctionQueue[i].head, col[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 //if ( rt != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 if ( rt != NULL ) //&& ( rt->flag & 0x40 ) != ( samFlag & 0x40 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431 if ( l <= flank || r <= flank )//|| secondary )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 if ( rt->leftAnchor > flank && rt->rightAnchor > flank )//&& !rt->secondary )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 validRead = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 rt->valid = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 rt->valid = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443 if ( i >= QUEUE_SIZE )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 i = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 //if ( start == 8077108 && end == 8078439 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448 // exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449 i = qHead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 while ( i != qTail )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453 if ( junctionQueue[i].start == start &&
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 junctionQueue[i].end == end )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 if (junctionQueue[i].strand == '?' && junctionQueue[i].strand != strand )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 junctionQueue[i].strand = strand ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 InsertReadTree( &junctionQueue[i].head, col[0], l, r ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 if ( junctionQueue[i].end < prune && i == qHead )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 // pop
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467 PrintJunction( col[2], junctionQueue[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 ClearReadTree( junctionQueue[i].head.left ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 ClearReadTree( junctionQueue[i].head.right ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470 ++qHead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 if ( qHead >= QUEUE_SIZE )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 qHead = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 if ( i >= QUEUE_SIZE )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476 i = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 InsertQueue( start, end, l, r ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 // Compute the junctions based on the CIGAR
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 bool CompareJunctions( int startLocation, char *cigar )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486 int currentLocation = startLocation ; // Current location on the reference genome
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 int i, j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488 int num ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 int newJuncCnt = 0 ; // The # of junctions in the read, and the # of new junctions among them.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 struct _cigarSeg cigarSeg[1000] ; // A segment of the cigar.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 int ccnt = 0 ; // cigarSeg cnt
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494 j = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 num = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 validRead = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 for ( i = 0 ; cigar[i] ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500 if ( cigar[i] >= '0' && cigar[i] <= '9' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 num = num * 10 + cigar[i] - '0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506 cigarSeg[ccnt].len = num ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507 cigarSeg[ccnt].type = cigar[i] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 ++ccnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509 num = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513 // Filter low complex alignment.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 // Only applies this to alignments does not have a strand information.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 if ( strand == '?' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517 if ( noncanonStrandInfo != -1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519 if ( ( noncanonStrandInfo & filterYS ) != 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520 validRead = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 int softStart = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525 int softEnd = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 if ( cigarSeg[0].type == 'S' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 softStart = cigarSeg[0].len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528 if ( cigarSeg[ ccnt - 1 ].type == 'S' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529 softEnd = cigarSeg[ ccnt - 1 ].len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530 int readLen = strlen( col[9] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531 int count[5] = { 0, 0, 0, 0, 0 } ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533 int pos = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534 for ( i = 0 ; i < ccnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536 switch ( cigarSeg[i].type )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 case 'S':
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539 pos += cigarSeg[i].len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540 case 'M':
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541 case 'I':
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543 for ( j = 0 ; j < cigarSeg[i].len ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 ++count[ nucToNum[ col[9][pos + j] - 'A' ] ] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
545 pos += j ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
546 } break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
547 case 'N':
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
548 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
549 int max = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
550 int sum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
551 for ( j = 0 ; j < 5 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
552 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
553 if ( count[j] > max )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
554 max = count[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
555 sum += count[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
556 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
557 if ( max > 0.8 * sum )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
558 validRead = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
559 count[0] = count[1] = count[2] = count[3] = count[4] = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
560 } break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
561 case 'H':
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
562 case 'P':
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
563 case 'D':
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
564 default: break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
565 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
566 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
567
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
568 int max = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
569 int sum = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
570 for ( j = 0 ; j < 5 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
571 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
572 if ( count[j] > max )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
573 max = count[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
574 sum += count[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
575 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
576 if ( max > 0.8 * sum )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
577 validRead = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
578 /* count[0] = count[1] = count[2] = count[3] = count[4] = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
579
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
580
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
581 for ( i = softStart + 1 ; i < readLen - softEnd ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
582 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
583 switch ( col[9][i] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
584 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
585 case 'A': ++count[0] ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
586 case 'C': ++count[1] ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
587 case 'G': ++count[2] ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
588 case 'T': ++count[3] ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
589 default: ++count[4] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
590 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
591 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
592 int max = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
593 for ( j = 0 ; j < 5 ; ++j )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
594 if ( count[j] > max )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
595 max = count[j] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
596 if ( max > 0.6 * ( readLen - softEnd - softStart - 1 ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
597 validRead = false ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
598 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
599 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
600
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
601 // Test whether contradict with mate pair
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
602 if ( col[6][0] == '=' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
603 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
604 currentLocation = startLocation ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
605 for ( i = 0 ; i < ccnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
606 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
607 if ( cigarSeg[i].type == 'I' || cigarSeg[i].type == 'S' || cigarSeg[i].type == 'H'
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
608 || cigarSeg[i].type == 'P' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
609 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
610 else if ( cigarSeg[i].type == 'N' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
611 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
612 if ( mateStart >= currentLocation && mateStart <= currentLocation + cigarSeg[i].len - 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
613 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
614 /*if ( cigarSeg[i].len == 91486 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
615 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
616 printf( "%s %d %d\n", currentLocation, mateStart ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
617 exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
618 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
619 // ignore this read
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
620 //return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
621 InsertContradictedReads( contradictedReads, col[0], mateStart ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
622 validRead = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
623 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
624 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
625 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
626 currentLocation += cigarSeg[i].len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
627 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
628
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
629 i = qHead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
630 // Search if it falls in the splices junction created by its mate.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
631 while ( i != qTail )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
632 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
633 if ( mateStart < junctionQueue[i].start && junctionQueue[i].start <= startLocation
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
634 && startLocation <= junctionQueue[i].end
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
635 && SearchContradictedReads( contradictedReads, col[0], startLocation ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
636 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
637 validRead = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
638 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
639 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
640 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
641 if ( i >= QUEUE_SIZE )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
642 i = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
643 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
644 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
645
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
646 currentLocation = startLocation ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
647 for ( i = 0 ; i < ccnt ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
648 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
649 if ( cigarSeg[i].type == 'I' || cigarSeg[i].type == 'S' || cigarSeg[i].type == 'H'
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
650 || cigarSeg[i].type == 'P' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
651 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
652 else if ( cigarSeg[i].type == 'N' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
653 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
654 int left, right ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
655 int tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
656 tmp = i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
657 while ( i > 0 && ( cigarSeg[i - 1].type == 'I' || cigarSeg[i - 1].type == 'D' ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
658 --i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
659 if ( i > 0 && cigarSeg[i - 1].type == 'M' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
660 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
661 left = cigarSeg[i - 1].len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
662 if ( i >= 2 && cigarSeg[i - 2].type == 'N' && left <= flank )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
663 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
664 left = flank + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
665 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
666 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
667 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
668 left = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
669
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
670 i = tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
671 while ( i < ccnt && ( cigarSeg[i + 1].type == 'I' || cigarSeg[i + 1].type == 'D' ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
672 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
673 if ( i < ccnt && cigarSeg[i + 1].type == 'M' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
674 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
675 right = cigarSeg[i + 1].len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
676 if ( i + 2 < ccnt && cigarSeg[i + 2].type == 'N' && right <= flank )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
677 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
678 right = flank + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
679 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
680 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
681 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
682 right = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
683 i = tmp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
684 if ( !SearchQueue( currentLocation, currentLocation + cigarSeg[i].len - 1, startLocation, left, right ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
685 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
686 ++newJuncCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
687 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
688 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
689 currentLocation += cigarSeg[i].len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
690 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
691 /*else if ( cigar[i] == 'I' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
692 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
693 num = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
694 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
695 else if ( cigar[i] == 'N' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
696 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
697 if ( !SearchQueue( currentLocation, currentLocation + num - 1, startLocation ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
698 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
699 ++newJuncCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
700 //if ( flagPrintJunction )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
701 // printf( "%s %d %d\n", col[2], currentLocation - 2, currentLocation + len - 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
702 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
703 currentLocation += num ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
704 num = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
705 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
706 else // Other operations, like M, D,...,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
707 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
708 currentLocation += num ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
709 num = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
710 }*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
711
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
712 junctionCnt += newJuncCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
713
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
714 if ( newJuncCnt )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
715 return true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
716 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
717 return false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
718 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
719
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
720 void cigar2string( bam1_core_t *c, uint32_t *in_cigar, char *out_cigar )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
721 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
722 int k, op, l ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
723 char opcode ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
724
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
725 *out_cigar = '\0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
726 for ( k = 0 ; k < c->n_cigar ; ++k )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
727 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
728 op = in_cigar[k] & BAM_CIGAR_MASK ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
729 l = in_cigar[k] >> BAM_CIGAR_SHIFT ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
730 switch (op)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
731 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
732 case BAM_CMATCH: opcode = 'M' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
733 case BAM_CINS: opcode = 'I' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
734 case BAM_CDEL: opcode = 'D' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
735 case BAM_CREF_SKIP: opcode = 'N' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
736 case BAM_CSOFT_CLIP: opcode = 'S' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
737 case BAM_CHARD_CLIP: opcode = 'H' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
738 case BAM_CPAD: opcode = 'P' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
739 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
740 sprintf( out_cigar + strlen( out_cigar ), "%d%c", l, opcode ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
741 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
742 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
743
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
744
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
745
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
746 int main( int argc, char *argv[] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
747 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
748 FILE *fp ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
749 samfile_t *fpsam ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
750 bam1_t *b = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
751 bool useSam = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
752
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
753 int i, len ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
754 int startLocation ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
755 bool flagRemove = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
756 char prevChrome[103] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
757
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
758 anchorBoth = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
759
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
760 flagPrintJunction = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
761 flagPrintAll = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
762 flagStrict = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
763 junctionCnt = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
764 bool hasMateReadIdSuffix = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
765
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
766 strcpy( prevChrome, "" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
767 flagRemove = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
768 flagPrintJunction = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
769 flank = 8 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
770 filterYS = 4 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
771
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
772 contradictedReads = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
773
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
774 // processing the argument list
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
775 for ( i = 1 ; i < argc ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
776 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
777 if ( !strcmp( argv[i], "-h" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
778 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
779 PrintHelp() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
780 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
781 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
782 else if ( !strcmp( argv[i], "-r" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
783 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
784 flagRemove = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
785 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
786 else if ( !strcmp( argv[i], "-j" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
787 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
788 strcpy( prevChrome, "" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
789 flagRemove = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
790 flagPrintJunction = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
791 flank = atoi( argv[i + 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
792 if ( i + 2 < argc && !strcmp( argv[i+2], "-B" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
793 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
794 anchorBoth = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
795 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
796 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
797 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
798 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
799 else if ( !strcmp( argv[i], "-a" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
800 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
801 flagPrintAll = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
802 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
803 else if ( !strcmp( argv[i], "--strict" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
804 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
805 flagStrict = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
806 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
807 else if ( !strcmp( argv[i], "-y" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
808 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
809 filterYS = atoi( argv[i + 1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
810 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
811 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
812 else if ( !strcmp( argv[i], "--hasMateIdSuffix" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
813 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
814 hasMateReadIdSuffix = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
815 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
816 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
817 else if ( i > 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
818 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
819 printf( "Unknown option %s\n", argv[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
820 exit( 1 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
821 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
822 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
823 if ( argc == 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
824 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
825 PrintHelp() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
826 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
827 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
828
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
829 len = strlen( argv[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
830 if ( argv[1][len-3] == 'b' || argv[1][len-3] == 'B' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
831 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
832 if ( !( fpsam = samopen( argv[1], "rb", 0 ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
833 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
834
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
835 if ( !fpsam->header )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
836 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
837 //samclose( fpsam ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
838 //fpsam = samopen( argv[1], "r", 0 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
839 //if ( !fpsam->header )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
840 //{
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
841 useSam = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
842 fp = fopen( argv[1], "r" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
843 //}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
844 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
845 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
846 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
847 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
848 useSam = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
849 fp = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
850 if ( !strcmp( argv[1], "-" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
851 fp = stdin ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
852 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
853 fp = fopen( argv[1], "r" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
854 if ( fp == NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
855 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
856 printf( "Could not open file %s\n", argv[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
857 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
858 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
859 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
860
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
861 while ( 1 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
862 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
863 int flag = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
864 if ( useSam )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
865 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
866 if ( b )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
867 bam_destroy1( b ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
868 b = bam_init1() ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
869 if ( samread( fpsam, b ) <= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
870 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
871 if ( b->core.tid >= 0 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
872 strcpy( col[2], fpsam->header->target_name[b->core.tid] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
873 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
874 strcpy( col[2], "-1" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
875 cigar2string( &(b->core), bam1_cigar( b ), col[5] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
876 strcpy( col[0], bam1_qname( b ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
877 flag = b->core.flag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
878 if ( bam_aux_get( b, "NH" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
879 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
880 /*if ( bam_aux2i( bam_aux_get(b, "NH" ) ) >= 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
881 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
882 secondary = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
883 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
884 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
885 secondary = false ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
886
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
887 NH = bam_aux2i( bam_aux_get( b, "NH" ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
888 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
889 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
890 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
891 //secondary = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
892 NH = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
893 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
894
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
895 if ( bam_aux_get( b, "NM" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
896 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
897 editDistance = bam_aux2i( bam_aux_get( b, "NM" ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
898 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
899 else if ( bam_aux_get( b, "nM" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
900 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
901 editDistance = bam_aux2i( bam_aux_get( b, "nM" ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
902 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
903 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
904 editDistance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
905
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
906 mateStart = b->core.mpos + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
907 if ( b->core.mtid == b->core.tid )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
908 col[6][0] = '=' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
909 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
910 col[6][0] = '*' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
911
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
912 if ( b->core.l_qseq < 20 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
913 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
914
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
915 for ( i = 0 ; i < b->core.l_qseq ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
916 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
917 int bit = bam1_seqi( bam1_seq( b ), i ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
918 switch ( bit )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
919 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
920 case 1: col[9][i] = 'A' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
921 case 2: col[9][i] = 'C' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
922 case 4: col[9][i] = 'G' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
923 case 8: col[9][i] = 'T' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
924 case 15: col[9][i] = 'N' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
925 default: col[9][i] = 'A' ; break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
926 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
927 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
928 col[9][i] = '\0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
929
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
930 /*if ( flag & 0x100 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
931 secondary = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
932 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
933 secondary = false ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
934 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
935 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
936 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
937 char *p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
938 if ( !fgets( line, LINE_SIZE, fp ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
939 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
940 if ( line[0] == '\0' || line[0] == '@' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
941 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
942 sscanf( line, "%s%s%s%s%s%s%s%s%s%s%s", col, col + 1, col + 2, col + 3, col + 4,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
943 col + 5, col + 6, col + 7, col + 8, col + 9, col + 10 ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
944
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
945 flag = atoi( col[1] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
946 if ( p = strstr( line, "NH" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
947 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
948 int k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
949 p += 5 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
950 while ( *p != ' ' && *p != '\t' && *p != '\0')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
951 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
952 k = k * 10 + *p - '0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
953 ++p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
954 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
955 /*if ( k >= 2 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
956 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
957 secondary = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
958 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
959 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
960 secondary = false ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
961 NH = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
962 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
963 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
964 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
965 //secondary = false ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
966 NH = 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
967 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
968 if ( ( p = strstr( line, "NM" ) ) || ( p = strstr( line, "nM" ) ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
969 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
970 int k = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
971 p += 5 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
972 while ( *p != ' ' && *p != '\t' && *p != '\0')
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
973 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
974 k = k * 10 + *p - '0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
975 ++p ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
976 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
977 editDistance = k ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
978 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
979 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
980 editDistance = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
981
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
982 mateStart = atoi( col[7] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
983 /*if ( flag & 0x100 )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
984 secondary = true ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
985 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
986 secondary = false ;*/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
987
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
988 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
989 samFlag = flag ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
990 for ( i = 0 ; col[5][i] ; ++i )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
991 if ( col[5][i] == 'N' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
992 break ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
993
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
994 if ( !col[5][i] )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
995 continue ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
996
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
997 // remove .1, .2 or /1, /2 suffix
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
998 if ( hasMateReadIdSuffix )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
999 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1000 char *s = col[0] ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1001 int len = strlen( s ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1002 if ( len >= 2 && ( s[len - 1] == '1' || s[len - 1] == '2' )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1003 && ( s[len - 2] == '.' || s[len - 2] == '/' ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1004 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1005 s[len - 2] = '\0' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1006 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1007 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1008
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1009 if ( useSam )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1010 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1011 if ( bam_aux_get( b, "XS" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1012 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1013 strand = bam_aux2A( bam_aux_get( b, "XS" ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1014 if ( bam_aux_get( b, "YS" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1015 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1016 noncanonStrandInfo = bam_aux2i( bam_aux_get( b, "YS" ) ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1017 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1018 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1019 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1020 noncanonStrandInfo = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1021 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1022 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1023 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1024 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1025 strand = '?' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1026 noncanonStrandInfo = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1027 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1028 startLocation = b->core.pos + 1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1029 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1030 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1031 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1032 if ( strstr( line, "XS:A:-" ) ) // on negative strand
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1033 strand = '-' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1034 else if ( strstr( line, "XS:A:+" ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1035 strand = '+' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1036 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1037 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1038 strand = '?' ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1039 char *p = strstr( line, "YS:i:" ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1040 if ( p != NULL )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1041 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1042 p += 5 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1043 noncanonStrandInfo = atoi( p ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1044 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1045 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1046 noncanonStrandInfo = -1 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1047 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1048 startLocation = atoi( col[3] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1049 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1050
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1051 // Found the junctions from the read.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1052 if ( strcmp( prevChrome, col[2] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1053 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1054 if ( flagPrintJunction )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1055 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1056 // Print the remaining elements in the queue
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1057 i = qHead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1058 while ( i != qTail )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1059 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1060 PrintJunction( prevChrome, junctionQueue[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1061 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1062 if ( i >= QUEUE_SIZE )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1063 i = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1064 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1065 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1066
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1067 // new chromosome
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1068 ClearReadTree( contradictedReads ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1069 contradictedReads = NULL ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1070 qHead = qTail = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1071 strcpy( prevChrome, col[2] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1072 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1073
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1074 if ( flagRemove )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1075 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1076 if ( CompareJunctions( startLocation, col[5] ) )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1077 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1078 // Test whether this read has new junctions
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1079 //++junctionCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1080 if ( !flagPrintJunction )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1081 printf( "%s", line ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1082 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1083 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1084 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1085 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1086 ++junctionCnt ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1087 printf( "%s", line ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1088 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1089 //printf( "hi2 %s\n", col[0] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1090 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1091
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1092 if ( flagPrintJunction )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1093 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1094 // Print the remaining elements in the queue
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1095 i = qHead ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1096 while ( i != qTail )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1097 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1098 PrintJunction( prevChrome, junctionQueue[i] ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1099 ++i ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1100 if ( i >= QUEUE_SIZE )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1101 i = 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1102 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1103 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1104
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1105 //fprintf( stderr, "The number of junctions: %d\n", junctionCnt ) ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1106 return 0 ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1107 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1108