0
|
1 #include <stdio.h>
|
|
2 #include <stdlib.h>
|
|
3 #include <string.h>
|
|
4 #include <ctype.h>
|
|
5 #include <pthread.h>
|
|
6 #include <inttypes.h>
|
|
7 #include <math.h>
|
|
8 #include <float.h>
|
|
9 #include "structs.h"
|
|
10 #include "alignmentFunctions.h"
|
|
11 #include "commonFunctions.h"
|
|
12 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
|
13 #define MIN(x, y) (((x) <= (y)) ? (x) : (y))
|
|
14
|
|
15 int64_t compare_letters(unsigned char a, unsigned char b){
|
|
16 if(a != (unsigned char) 'N' && a != (unsigned char) '>') return (a == b) ? POINT : -POINT;
|
|
17 return -POINT;
|
|
18 }
|
|
19
|
|
20 llpos * getNewLocationllpos(Mempool_l * mp, uint64_t * n_pools_used){
|
|
21
|
|
22 if(mp[*n_pools_used].current == POOL_SIZE){
|
|
23 *n_pools_used += 1;
|
|
24 if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools");
|
|
25 init_mem_pool_llpos(&mp[*n_pools_used]);
|
|
26
|
|
27 }
|
|
28
|
|
29 llpos * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current;
|
|
30 mp[*n_pools_used].current++;
|
|
31
|
|
32
|
|
33 return new_pos;
|
|
34 }
|
|
35
|
|
36 void init_mem_pool_llpos(Mempool_l * mp){
|
|
37 mp->base = (llpos *) calloc(POOL_SIZE, sizeof(llpos));
|
|
38 if(mp->base == NULL) terror("Could not request memory pool");
|
|
39 mp->current = 0;
|
|
40 }
|
|
41
|
|
42
|
|
43 void * load_input(void * a){
|
|
44
|
|
45 LoadingDBArgs * ldbargs = (LoadingDBArgs *) a;
|
|
46
|
|
47 // Requires
|
|
48 /*
|
|
49 char * temp_seq_buffer;
|
|
50 SeqInfo * data_database; 1 per thread
|
|
51 uint64_t t_len;
|
|
52 uint64_t word_size;
|
|
53 uint64_t read_from;
|
|
54 uint64_t read_to;
|
|
55 char thread_id;
|
|
56 */
|
|
57
|
|
58 uint64_t c_pos;
|
|
59
|
|
60 unsigned char curr_kmer[custom_kmer];
|
|
61 unsigned char aux_kmer[custom_kmer+1];
|
|
62 curr_kmer[0] = '\0';
|
|
63 uint64_t word_size = 0, pos_in_database = 0;
|
|
64 unsigned char char_converter[91];
|
|
65 uint64_t curr_seq = 0;
|
|
66 char_converter[(unsigned char)'A'] = 0;
|
|
67 char_converter[(unsigned char)'C'] = 1;
|
|
68 char_converter[(unsigned char)'G'] = 2;
|
|
69 char_converter[(unsigned char)'T'] = 3;
|
|
70 //llpos * aux;
|
|
71 AVLTree * pointer;
|
|
72
|
|
73 char c;
|
|
74
|
|
75 /*
|
|
76 if(ldbargs->thread_id == 'A'){
|
|
77 printf("read to is: %"PRIu64"\n", ldbargs->read_to);
|
|
78 printf("make sure: %c %c %c\n", ldbargs->temp_seq_buffer[ldbargs->read_to-1], ldbargs->temp_seq_buffer[ldbargs->read_to], ldbargs->temp_seq_buffer[ldbargs->read_to+1]);
|
|
79
|
|
80 uint64_t z = ldbargs->read_to-1;
|
|
81 while(ldbargs->temp_seq_buffer[z] != '>'){
|
|
82 printf("%c", ldbargs->temp_seq_buffer[z]);
|
|
83 z--;
|
|
84 }
|
|
85 getchar();
|
|
86 }
|
|
87 if(ldbargs->thread_id == 'C'){
|
|
88 printf("HELLOOOOOOO im going from %"PRIu64"\n", ldbargs->read_from);
|
|
89 }
|
|
90 */
|
|
91
|
|
92 c_pos = ldbargs->read_from;
|
|
93 while(ldbargs->temp_seq_buffer[c_pos] != '>') ++c_pos;
|
|
94 ldbargs->read_from = c_pos;
|
|
95 c_pos = ldbargs->read_to;
|
|
96 while(c_pos < ldbargs->t_len && ldbargs->temp_seq_buffer[c_pos] != '>') ++c_pos;
|
|
97 ldbargs->read_to = c_pos;
|
|
98
|
|
99 c_pos = ldbargs->read_from;
|
|
100 c = ldbargs->temp_seq_buffer[c_pos];
|
|
101
|
|
102
|
|
103 //printf("thread going from %"PRIu64" to %"PRIu64"\n", ldbargs->read_from, ldbargs->read_to);
|
|
104
|
|
105
|
|
106 while(c_pos < ldbargs->read_to){
|
|
107
|
|
108
|
|
109 if(c == '>'){
|
|
110
|
|
111 //if(ldbargs->thread_id == 'G') printf("putting in %"PRIu64" @ %"PRIu64"\n", curr_seq, c_pos);
|
|
112 ldbargs->data_database->start_pos[curr_seq] = pos_in_database; ++curr_seq;
|
|
113
|
|
114 // REalloc sequences and sequence index
|
|
115 if(pos_in_database == READBUF*ldbargs->n_allocs){
|
|
116 ldbargs->n_allocs++; ldbargs->data_database->sequences = (unsigned char *) realloc(ldbargs->data_database->sequences, READBUF*ldbargs->n_allocs*sizeof(unsigned char));
|
|
117 if(ldbargs->data_database->sequences == NULL) terror("Could not reallocate temporary database");
|
|
118 }
|
|
119
|
|
120 if(curr_seq == INITSEQS*ldbargs->n_allocs){
|
|
121 ldbargs->n_allocs++; ldbargs->data_database->start_pos = (uint64_t *) realloc(ldbargs->data_database->start_pos, INITSEQS*ldbargs->n_allocs*sizeof(uint64_t));
|
|
122 }
|
|
123
|
|
124
|
|
125
|
|
126 while(c != '\n'){ c = ldbargs->temp_seq_buffer[c_pos]; ++c_pos; } //Skip ID
|
|
127
|
|
128 while(c != '>' && c_pos < ldbargs->read_to){ //Until next id
|
|
129
|
|
130 //if(ldbargs->thread_id == 'A') printf("!!!!!!%"PRIu64" from:%"PRIu64", to %"PRIu64"\n", c_pos, ldbargs->read_from, ldbargs->read_to);
|
|
131 c = ldbargs->temp_seq_buffer[c_pos]; ++c_pos;
|
|
132 c = toupper(c);
|
|
133 if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){
|
|
134 curr_kmer[word_size] = (unsigned char) c;
|
|
135 if(word_size < custom_kmer) ++word_size;
|
|
136
|
|
137 ldbargs->data_database->sequences[pos_in_database] = (unsigned char) c; ++pos_in_database;
|
|
138 // REalloc sequences and sequence index
|
|
139 if(pos_in_database == READBUF*ldbargs->n_allocs){
|
|
140 ldbargs->n_allocs++; ldbargs->data_database->sequences = (unsigned char *) realloc(ldbargs->data_database->sequences, READBUF*ldbargs->n_allocs*sizeof(unsigned char));
|
|
141 if(ldbargs->data_database->sequences == NULL) terror("Could not reallocate temporary database");
|
|
142 }
|
|
143
|
|
144 if(curr_seq == INITSEQS*ldbargs->n_allocs){
|
|
145 ldbargs->n_allocs++; ldbargs->data_database->start_pos = (uint64_t *) realloc(ldbargs->data_database->start_pos, INITSEQS*ldbargs->n_allocs*sizeof(uint64_t));
|
|
146 }
|
|
147 }else{ //It can be anything (including N, Y, X ...)
|
|
148
|
|
149 if(c != '\n' && c != '\r' && c != '>'){
|
|
150 word_size = 0;
|
|
151 ldbargs->data_database->sequences[pos_in_database] = (unsigned char) 'N'; ++pos_in_database; //Convert to N
|
|
152 // REalloc sequences and sequence index
|
|
153 if(pos_in_database == READBUF*ldbargs->n_allocs){
|
|
154 ldbargs->n_allocs++; ldbargs->data_database->sequences = (unsigned char *) realloc(ldbargs->data_database->sequences, READBUF*ldbargs->n_allocs*sizeof(unsigned char));
|
|
155 if(ldbargs->data_database->sequences == NULL) terror("Could not reallocate temporary database");
|
|
156 }
|
|
157
|
|
158 if(curr_seq == INITSEQS*ldbargs->n_allocs){
|
|
159 ldbargs->n_allocs++; ldbargs->data_database->start_pos = (uint64_t *) realloc(ldbargs->data_database->start_pos, INITSEQS*ldbargs->n_allocs*sizeof(uint64_t));
|
|
160 }
|
|
161 }
|
|
162 }
|
|
163 if(word_size == custom_kmer){
|
|
164 //write to hash table
|
|
165
|
|
166
|
|
167 pointer = &ldbargs->ct->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]]
|
|
168 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]]
|
|
169 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]]
|
|
170 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
|
|
171
|
|
172
|
|
173 pointer = insert_AVLTree(pointer, hashOfWord(&curr_kmer[FIXED_K], custom_kmer-FIXED_K), ldbargs->mp_AVL, &ldbargs->n_pools_used_AVL, pos_in_database, ldbargs->mp, &ldbargs->n_pools_used, curr_seq-1);
|
|
174
|
|
175
|
|
176
|
|
177 // CURRENTLY USING OVERLAPPING
|
|
178
|
|
179 memcpy(aux_kmer, &curr_kmer[1], custom_kmer-1);
|
|
180 memcpy(curr_kmer, aux_kmer, custom_kmer-1);
|
|
181 word_size--;
|
|
182
|
|
183 // For NON OVERLAPPING ENABLE THIS
|
|
184 //word_size = 0;
|
|
185 }
|
|
186
|
|
187 }
|
|
188 word_size = 0;
|
|
189 }else{
|
|
190 c = ldbargs->temp_seq_buffer[c_pos]; ++c_pos;
|
|
191 }
|
|
192
|
|
193 }
|
|
194 /*
|
|
195 if(ldbargs->thread_id == 'T'){
|
|
196 uint64_t j;
|
|
197 for(j=0; j < curr_seq-1; j++){
|
|
198 printf("%"PRIu64" - %"PRIu64"\n", ldbargs->data_database->start_pos[j], ldbargs->data_database->start_pos[j+1]);
|
|
199 }
|
|
200 }
|
|
201 */
|
|
202 /*
|
|
203 if(ldbargs->thread_id == 'A'){
|
|
204 printf("\nAT %"PRIu64", and pos_database = %"PRIu64"\n", ldbargs->data_database->start_pos[curr_seq-1], pos_in_database);
|
|
205 uint64_t z = pos_in_database;
|
|
206 while(z > ldbargs->data_database->start_pos[curr_seq-1]){
|
|
207 printf("%c", ldbargs->data_database->sequences[z]);
|
|
208 z--;
|
|
209 }
|
|
210
|
|
211 getchar();
|
|
212 }
|
|
213 */
|
|
214
|
|
215 ldbargs->data_database->start_pos[curr_seq] = pos_in_database;
|
|
216 ldbargs->data_database->total_len = pos_in_database;
|
|
217 ldbargs->contained_reads = curr_seq;
|
|
218 ldbargs->data_database->n_seqs = curr_seq;
|
|
219 ldbargs->base_coordinates = pos_in_database;
|
|
220 return NULL;
|
|
221 }
|
|
222
|
|
223
|
|
224 void * computeAlignmentsByThread(void * a){
|
|
225
|
|
226 /*
|
|
227 typedef struct {
|
|
228 SeqInfo * database; //Database sequence and lengths
|
|
229 SeqInfo * query; //Query sequence and lengths
|
|
230 uint64_t from; //Starting READ to compute alignments from
|
|
231 uint64_t to; //End READ to compute alignments from
|
|
232 Container * container; //Container to hold the multidimensional array
|
|
233 uint64_t accepted_query_reads; //Number of reads that have a fragment with evalue less than specified
|
|
234 long double min_e_value; //Minimum evalue to accept read
|
|
235 } HashTableArgs;
|
|
236 */
|
|
237
|
|
238 HashTableArgs * hta = (HashTableArgs *) a;
|
|
239 Queue * my_current_task = NULL;
|
|
240
|
|
241 unsigned char char_converter[91];
|
|
242 char_converter[(unsigned char)'A'] = 0;
|
|
243 char_converter[(unsigned char)'C'] = 1;
|
|
244 char_converter[(unsigned char)'G'] = 2;
|
|
245 char_converter[(unsigned char)'T'] = 3;
|
|
246 Quickfrag qf;
|
|
247 int64_t * cell_path_y = (int64_t *) malloc(MAX_READ_SIZE*sizeof(int64_t));
|
|
248 if(cell_path_y == NULL) terror("Could not allocate cell paths");
|
|
249
|
|
250
|
|
251 Point p0, p1, p2, p3; //Points for NW anchored
|
|
252 p0.x = 0; p0.y = 0;
|
|
253
|
|
254 AVLContainer * ptr_table_redirect[4];
|
|
255 ptr_table_redirect[0] = hta->container_a;
|
|
256 ptr_table_redirect[1] = hta->container_b;
|
|
257 ptr_table_redirect[2] = hta->container_c;
|
|
258 ptr_table_redirect[3] = hta->container_d;
|
|
259 unsigned char current_table = 0;
|
|
260
|
|
261
|
|
262
|
|
263 //To keep track of which reads are we reading
|
|
264 uint64_t curr_read, curr_db_seq, xlen, ylen;
|
|
265 uint64_t crrSeqL, pos_of_hit = 0xFFFFFFFFFFFFFFFF;
|
|
266
|
|
267 //Reading from buffer
|
|
268 char c;
|
|
269 unsigned char curr_kmer[custom_kmer], b_aux[custom_kmer];
|
|
270 llpos * aux;
|
|
271 AVLTree * pointer;
|
|
272
|
|
273 // For NW-alignment
|
|
274 int NWaligned;
|
|
275 uint64_t n_hits, alignments_tried;
|
|
276
|
|
277 BasicAlignment ba; //The resulting alignment from the NW
|
|
278 uint64_t curr_pos = 0; //Reading-head position
|
|
279 uint64_t up_to = 0;
|
|
280
|
|
281 int64_t last_diagonal = INT64_MIN; // Diagonal to skip repeated hits
|
|
282 unsigned char already_aligned = FALSE; // To not count more times the same read
|
|
283
|
|
284
|
|
285
|
|
286
|
|
287 //Get next operation in queue
|
|
288 while(NULL != ( my_current_task = get_task_from_queue(hta->queue_head, hta->lock))){
|
|
289 //Initialize all variables
|
|
290
|
|
291 qf.x_start = qf.y_start = qf.t_len = 0;
|
|
292 qf.e_value = LDBL_MAX;
|
|
293 last_diagonal = INT64_MIN;
|
|
294
|
|
295 //Starting from
|
|
296 curr_read = my_current_task->r1;
|
|
297 crrSeqL = 0; pos_of_hit = 0;
|
|
298
|
|
299 curr_kmer[0] = '\0'; b_aux[0] = '\0';
|
|
300 aux = NULL;
|
|
301 pointer = NULL;
|
|
302
|
|
303 NWaligned = 0;
|
|
304 n_hits = 0;
|
|
305 alignments_tried = 0;
|
|
306
|
|
307 ba.identities = 0; ba.length = 0; ba.igaps = 0xFFFFFFFFFFFFFFFF; ba.egaps = 0xFFFFFFFFFFFFFFFF;
|
|
308 memset(&hta->markers[0], 0, hta->database->n_seqs); // Reset used tags
|
|
309 already_aligned = FALSE;
|
|
310
|
|
311 //Set current header position at the position of the read start (the ">")
|
|
312 curr_pos = hta->query->start_pos[curr_read]; //Skip the ">"
|
|
313 c = (char) hta->query->sequences[curr_pos];
|
|
314
|
|
315 //printf("Im doing from %"PRIu64" to %"PRIu64", nseqs=%"PRIu64"\n", my_current_task->r1, my_current_task->r2, hta->query->n_seqs);
|
|
316 //getchar();
|
|
317
|
|
318 while(curr_read < my_current_task->r2 && curr_pos < hta->query->total_len){
|
|
319
|
|
320
|
|
321
|
|
322 if(curr_read != hta->query->n_seqs) up_to = hta->query->start_pos[curr_read+1]-1; else up_to = hta->query->total_len;
|
|
323 //printf("Currrpos: %"PRIu64" up to: %"PRIu64" on read: %"PRIu64"\n", curr_pos, up_to, curr_read);
|
|
324
|
|
325 if (curr_pos == up_to) { // Comment, empty or quality (+) line
|
|
326 crrSeqL = 0; // Reset buffered sequence length
|
|
327 #ifdef VERBOSE
|
|
328 printf("Read: %"PRIu64" yielded (%d)\n", curr_read, NWaligned);
|
|
329 #endif
|
|
330 //if(NWaligned == 0){ printf("Read: %"PRIu64" yielded (%d)\n", curr_read, NWaligned);}
|
|
331 //printf("Read: %"PRIu64" yielded (%d)\n", curr_read, NWaligned); if(NWaligned == 0) getchar();
|
|
332 NWaligned = 0;
|
|
333 //fprintf(stdout, "Seq %"PRIu64" has %"PRIu64" hits and tried to align %"PRIu64" times\n", curr_read, n_hits, alignments_tried);
|
|
334 //fflush(stdout);
|
|
335 n_hits = 0;
|
|
336 already_aligned = FALSE;
|
|
337 alignments_tried = 0;
|
|
338 last_diagonal = INT64_MIN; // This is not perfect but if the diagonal reaches the value then we have an overflow anyway
|
|
339 qf.x_start = 0;
|
|
340 qf.t_len = 0;
|
|
341
|
|
342 //if(hta->full_comp == TRUE) memset(&hta->markers[my_current_task->r1], 0, my_current_task->r2 - my_current_task->r1 + 1); // Reset used tags
|
|
343 memset(&hta->markers[0], FALSE, hta->database->n_seqs); // Reset used tags
|
|
344
|
|
345 curr_read++;
|
|
346 //printf("On current read %"PRIu64"\n", curr_read);
|
|
347 continue;
|
|
348 }
|
|
349
|
|
350 if(c == 'A' || c == 'C' || c == 'T' || c == 'G'){
|
|
351 curr_kmer[crrSeqL] = (unsigned char) c;
|
|
352 crrSeqL++;
|
|
353 }else{
|
|
354 crrSeqL = 0;
|
|
355 }
|
|
356
|
|
357 if (crrSeqL >= custom_kmer) { // Full well formed sequence
|
|
358
|
|
359
|
|
360 //printf("comparing hit: %.11s\n", (char *)&curr_kmer[0]);
|
|
361 //getchar();
|
|
362
|
|
363 // Choose table
|
|
364 /*
|
|
365 if(curr_kmer[0] == (unsigned char) 'A'){
|
|
366 ptr_table_redirect = hta->container_A;
|
|
367 }else if(curr_kmer[0] == (unsigned char) 'C'){
|
|
368 ptr_table_redirect = hta->container_C;
|
|
369 }else if(curr_kmer[0] == (unsigned char) 'G'){
|
|
370 ptr_table_redirect = hta->container_G;
|
|
371 }else{
|
|
372 ptr_table_redirect = hta->container_T;
|
|
373 }
|
|
374 */
|
|
375
|
|
376 //fprintf(stdout, "%s\n", curr_kmer);
|
|
377 //fflush(stdout);
|
|
378 current_table = 0;
|
|
379 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]]
|
|
380 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]]
|
|
381 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]]
|
|
382 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
|
|
383
|
|
384 if(pointer == NULL){
|
|
385 ++current_table;
|
|
386 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]]
|
|
387 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]]
|
|
388 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]]
|
|
389 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
|
|
390 }
|
|
391 if(pointer == NULL){
|
|
392 ++current_table;
|
|
393 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]]
|
|
394 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]]
|
|
395 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]]
|
|
396 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
|
|
397 }
|
|
398 if(pointer == NULL){
|
|
399 ++current_table;
|
|
400 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]]
|
|
401 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]]
|
|
402 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]]
|
|
403 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
|
|
404 }
|
|
405
|
|
406 //While there are hits
|
|
407 //fprintf(stdout, "%p\n", aux);
|
|
408 //fflush(stdout);
|
|
409
|
|
410 uint64_t hash_forward = hashOfWord(&curr_kmer[FIXED_K], custom_kmer - FIXED_K);
|
|
411 AVLTree * search = find_AVLTree(pointer, hash_forward);
|
|
412 if(search != NULL) aux = search->next; else aux = NULL;
|
|
413
|
|
414
|
|
415 while(aux != NULL && ((hta->full_comp == FALSE && NWaligned == 0 && hta->markers[aux->s_id+ hta->contained_reads[current_table]] == 0) || (hta->full_comp && hta->markers[aux->s_id+ hta->contained_reads[current_table]] == 0))){
|
|
416
|
|
417 n_hits++;
|
|
418 //fprintf(stdout, "%p\n", aux);
|
|
419 //fflush(stdout);
|
|
420 // ADD OFFSET CUCOOOOOOOOOOOOOOOOOOOOOO!!!!!!!!!!!!!!!
|
|
421 //printf("my current table is %u\n", current_table);
|
|
422 //printf("check this woop: %"PRIu64" - %"PRIu64" - %"PRIu64" - %"PRIu64"\n", aux->s_id, hta->contained_reads[current_table], aux->pos, hta->base_coordinates[current_table]);
|
|
423 curr_db_seq = aux->s_id + hta->contained_reads[current_table];
|
|
424 pos_of_hit = aux->pos + hta->base_coordinates[current_table];
|
|
425 //printf("Pos of hit db: %"PRIu64", seq num %"PRIu64", contained reads: %"PRIu64", contained coord %"PRIu64"\n", pos_of_hit, curr_db_seq, hta->contained_reads[current_table], hta->base_coordinates[current_table]);
|
|
426 if(hta->hits != NULL){
|
|
427 hta->hits[curr_db_seq]++;
|
|
428 goto only_hits; // Count only hits and skip the rest
|
|
429 }
|
|
430
|
|
431 //fprintf(stdout, "Launching curr_read: %"PRIu64" @ %"PRIu64", vs curr_db_read: %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit);
|
|
432 /*
|
|
433 if(curr_read == 534) fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit);
|
|
434 */
|
|
435 #ifdef VERBOSE
|
|
436 fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit);
|
|
437 #endif
|
|
438 int64_t curr_diagonal = (int64_t)(curr_pos+1) - (int64_t) pos_of_hit;
|
|
439
|
|
440
|
|
441
|
|
442 if( (last_diagonal != curr_diagonal && !(qf.x_start <= (pos_of_hit + custom_kmer) && pos_of_hit <= (qf.x_start + qf.t_len)))){
|
|
443
|
|
444 /*
|
|
445 if(curr_db_seq == hta->database->n_seqs-1){
|
|
446 xlen = hta->database->total_len - hta->database->start_pos[curr_db_seq];
|
|
447 }else{
|
|
448 xlen = hta->database->start_pos[curr_db_seq+1] - hta->database->start_pos[curr_db_seq];
|
|
449 }
|
|
450 if(curr_read == hta->query->n_seqs-1){
|
|
451 ylen = hta->query->total_len - hta->query->start_pos[curr_read];
|
|
452 }else{
|
|
453 ylen = hta->query->start_pos[curr_read+1] - hta->query->start_pos[curr_read];
|
|
454 }
|
|
455 */
|
|
456
|
|
457 /*if(curr_read == 35){
|
|
458 fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit);
|
|
459 printf("what do you think will happen: %"PRIu64"\n", hta->database->start_pos[curr_db_seq]);
|
|
460 //fprintf(stdout, "lengths: x: %"PRIu64", y: %"PRIu64"\n", xlen, ylen);
|
|
461 getchar();
|
|
462 }*/
|
|
463
|
|
464
|
|
465 //printf("accepted because: \n");
|
|
466 /*if(curr_read % 100 == 0){
|
|
467 fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit);
|
|
468 }
|
|
469 */
|
|
470 //printf("prev_diag: %"PRId64"- currdiag: %"PRId64"\n", last_diagonal, curr_diagonal);
|
|
471 //printf("\t covers to [%"PRIu64"+%"PRIu64"=%"PRIu64"] [%"PRIu64"] \n", qf.x_start, qf.t_len, qf.x_start+qf.t_len, pos_of_hit); getchar();
|
|
472 if(hta->markers[curr_db_seq] == FALSE){
|
|
473
|
|
474 //if(current_table > 0) getchar();
|
|
475 alignmentFromQuickHits(hta->database, hta->query, pos_of_hit, curr_pos+1, curr_read, curr_db_seq, &qf, hta->contained_reads[current_table], hta->base_coordinates[current_table]);
|
|
476 last_diagonal = curr_diagonal;
|
|
477 }else{
|
|
478 qf.e_value = 100000000;
|
|
479 }
|
|
480
|
|
481 //if(curr_read == 35) printf(" evalue: %Le from %"PRIu64", %"PRIu64" with l: %"PRIu64"\n", qf.e_value, qf.x_start, qf.y_start, qf.t_len);
|
|
482 }else{
|
|
483
|
|
484 /*if(curr_read == 35){
|
|
485 printf("rejected because: \n");
|
|
486 fprintf(stdout, "UNLaunching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit);
|
|
487 printf("prev_diag: %"PRId64"- currdiag: %"PRId64"\n", last_diagonal, curr_diagonal);
|
|
488 printf("\t covers to [%"PRIu64"+%"PRIu64"=%"PRIu64"] [%"PRIu64"] \n", qf.x_start, qf.t_len, qf.x_start+qf.t_len, pos_of_hit); getchar();
|
|
489 }*/
|
|
490
|
|
491 qf.e_value = 100000000;
|
|
492 }
|
|
493
|
|
494 #ifdef VERBOSE
|
|
495 printf(" evalue: %Le %"PRIu64"\n", qf.e_value, qf.t_len);
|
|
496 #endif
|
|
497 //getchar();
|
|
498
|
|
499
|
|
500
|
|
501
|
|
502
|
|
503 //If e-value of current frag is good, then we compute a good gapped alignment
|
|
504 if(qf.e_value < hta->min_e_value /*&& xlen == 799 && ylen == 2497*/){
|
|
505 alignments_tried++;
|
|
506 ba.identities = ba.length = ba.igaps = ba.egaps = 0;
|
|
507 //Compute lengths of reads
|
|
508 if(curr_db_seq == hta->database->n_seqs-1){
|
|
509 xlen = hta->database->total_len - hta->database->start_pos[curr_db_seq];
|
|
510 }else{
|
|
511 xlen = hta->database->start_pos[curr_db_seq+1] - hta->database->start_pos[curr_db_seq];
|
|
512 //printf("!!!\n%"PRIu64", %"PRIu64" :: %"PRIu64"; its db->start_pos[curr_db_seq] db->start_pos[curr_db_seq+1] curr_db_seq\n", hta->database->start_pos[curr_db_seq], hta->database->start_pos[curr_db_seq+1], curr_db_seq);
|
|
513 }
|
|
514 if(curr_read == hta->query->n_seqs-1){
|
|
515 ylen = hta->query->total_len - hta->query->start_pos[curr_read];
|
|
516 }else{
|
|
517 ylen = hta->query->start_pos[curr_read+1] - hta->query->start_pos[curr_read];
|
|
518 }
|
|
519 //fprintf(stdout, "lengths: x: %"PRIu64", y: %"PRIu64"\n", xlen, ylen);
|
|
520 //Perform alignment plus backtracking
|
|
521 //void build_alignment(char * reconstruct_X, char * reconstruct_Y, uint64_t curr_db_seq, uint64_t curr_read, HashTableArgs * hta, char * my_x, char * my_y, struct cell ** table, struct cell * mc, char * writing_buffer_alignment, BasicAlignment * ba, uint64_t xlen, uint64_t ylen)
|
|
522 if(xlen > MAX_READ_SIZE || ylen > MAX_READ_SIZE){ printf("(%"PRIu64",%"PRIu64")\n", xlen, ylen); terror("Read size reached for gapped alignment."); }
|
|
523 //fprintf(stdout, "R0 %"PRIu64", %"PRIu64"\n", curr_db_seq, curr_read);
|
|
524
|
|
525
|
|
526 #ifdef VERBOSE
|
|
527 fprintf(stdout, "qfxs %"PRIu64", dbs %"PRIu64", qfys %"PRIu64" qys %"PRIu64"\n", qf.x_start, hta->database->start_pos[curr_db_seq], qf.y_start, hta->query->start_pos[curr_read]);
|
|
528 #endif
|
|
529
|
|
530 //fprintf(stdout, "dbFragxs %"PRIu64", dbs %"PRIu64", rFragys %"PRIu64" rys %"PRIu64"\n", qf.x_start, hta->database->start_pos[curr_db_seq], qf.y_start, hta->query->start_pos[curr_read]);
|
|
531 /*
|
|
532 printf("at table: %u\n", current_table);
|
|
533 fprintf(stdout, "Launching curr_read: %"PRIu64" @ %"PRIu64", vs curr_db_read: %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit);
|
|
534 fprintf(stdout, "Launching NW %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit);
|
|
535 printf("have len: %"PRIu64", %"PRIu64"\n", xlen, ylen);
|
|
536 printf("Quickfrag (xs, ys): qf::%"PRIu64", %"PRIu64", tlen:%"PRIu64"\n", qf.x_start, qf.y_start, qf.t_len);
|
|
537 printf("Yea, but start and end of read in db is: %"PRIu64" - %"PRIu64"\n", hta->database->start_pos[curr_db_seq], hta->database->start_pos[curr_db_seq+1]);
|
|
538 */
|
|
539
|
|
540 p1.x = qf.x_start - hta->database->start_pos[curr_db_seq];
|
|
541 //p1.y = qf.y_start - hta->query->start_pos[curr_read];
|
|
542 p1.y = qf.y_start - (hta->query->start_pos[curr_read] -1);
|
|
543 p2.x = p1.x + qf.t_len;
|
|
544 p2.y = p1.y + qf.t_len;
|
|
545 p3.x = xlen;
|
|
546 p3.y = ylen;
|
|
547
|
|
548 #ifdef VERBOSE
|
|
549 fprintf(stdout, "p0 (%"PRIu64", %"PRIu64") p1 (%"PRIu64", %"PRIu64") p2 (%"PRIu64", %"PRIu64") p3 (%"PRIu64", %"PRIu64")\n", p0.x, p0.y, p1.x, p1.y, p2.x, p2.y, p3.x, p3.y);
|
|
550 #endif
|
|
551 //fprintf(stdout, "p0 (%"PRIu64", %"PRIu64") p1 (%"PRIu64", %"PRIu64") p2 (%"PRIu64", %"PRIu64") p3 (%"PRIu64", %"PRIu64")\n", p0.x, p0.y, p1.x, p1.y, p2.x, p2.y, p3.x, p3.y);
|
|
552
|
|
553 calculate_y_cell_path(p0, p1, p2, p3, cell_path_y);
|
|
554
|
|
555 // REMOVE
|
|
556 /*
|
|
557 uint64_t r1,r2;
|
|
558 for(r1=0;r1<MAX_WINDOW_SIZE;r1++){
|
|
559 for(r2=0;r2<MAX_WINDOW_SIZE;r2++){
|
|
560 hta->table[r1][r2].score = INT64_MIN;
|
|
561 }
|
|
562 }
|
|
563 */
|
|
564
|
|
565 build_alignment(hta->reconstruct_X, hta->reconstruct_Y, curr_db_seq, curr_read, hta, hta->my_x, hta->my_y, hta->table, hta->mc, hta->writing_buffer_alignment, &ba, xlen, ylen, cell_path_y, &hta->window);
|
|
566
|
|
567 // Set the read to already aligned so that it does not repeat
|
|
568 hta->markers[curr_db_seq] = 1;
|
|
569
|
|
570 #ifdef VERBOSE
|
|
571 printf("len 1 %"PRIu64", len 2 %"PRIu64"\n", ba.length, ylen);
|
|
572 printf("ident %"PRIu64"\n", ba.identities);
|
|
573 #endif
|
|
574
|
|
575 //If is good
|
|
576 if(((long double)(ba.length-(ba.igaps+ba.egaps))/ylen) >= hta->min_coverage && ((long double)ba.identities/(ba.length-(ba.igaps+ba.egaps))) >= hta->min_identity){
|
|
577 if(already_aligned == FALSE){
|
|
578 hta->accepted_query_reads++;
|
|
579 already_aligned = TRUE;
|
|
580 //printf("accepted: %"PRIu64"\n", hta->accepted_query_reads);
|
|
581 }
|
|
582
|
|
583 hta->markers[curr_db_seq] = 1;
|
|
584 if(hta->out != NULL){
|
|
585 //printf("Last was: (%"PRIu64", %"PRIu64")\n", curr_read, curr_db_seq);
|
|
586 fprintf(hta->out, "(%"PRIu64", %"PRIu64") : %d%% %d%% %"PRIu64"\n $$$$$$$ \n", curr_read, curr_db_seq, MIN(100,(int)(100*(ba.length-(ba.igaps+ba.egaps))/ylen)), MIN(100,(int)((long double)100*ba.identities/(ba.length-(ba.igaps+ba.egaps)))), ylen);
|
|
587 fprintf(hta->out, "%s", hta->writing_buffer_alignment);
|
|
588 //fprintf(stdout, "(%"PRIu64", %"PRIu64") : %d%% %d%% %"PRIu64"\n $$$$$$$ \n", curr_read, curr_db_seq, MIN(100,(int)(100*ba.identities/ba.length)), MIN(100,(int)(100*ba.length/ylen)), ylen);
|
|
589 //fprintf(stdout, "%s", hta->writing_buffer_alignment);
|
|
590 }
|
|
591 NWaligned = 1;
|
|
592 }/*else{
|
|
593 printf("what: ");
|
|
594 printf("len x %"PRIu64", len y %"PRIu64"\n", xlen, ylen);
|
|
595 printf("ident %"PRIu64" len %"PRIu64"\n", ba.identities, ba.length); getchar();
|
|
596 }*/
|
|
597
|
|
598 }
|
|
599
|
|
600 //strncpy(get_from_db, &hta->database->sequences[qf.x_start], qf.t_len);
|
|
601 //strncpy(get_from_query, &hta->query->sequences[qf.y_start], qf.t_len);
|
|
602 //fprintf(hta->out, "%s\n%s\n%Le\t%d\n-------------------\n", get_from_db, get_from_query, qf.e_value, (int)(100*qf.coverage));
|
|
603 //fprintf(hta->out, "%"PRIu64", %"PRIu64", %"PRIu64"\n", qf.x_start, qf.y_start, qf.t_len);
|
|
604
|
|
605 //printf("Hit comes from %"PRIu64", %"PRIu64"\n", pos_of_hit, curr_pos);
|
|
606 only_hits:
|
|
607 aux = aux->next;
|
|
608 while(aux == NULL && current_table < FIXED_LOADING_THREADS-1){
|
|
609 ++current_table;
|
|
610 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]]
|
|
611 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]]
|
|
612 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]]
|
|
613 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]];
|
|
614 hash_forward = hashOfWord(&curr_kmer[FIXED_K], custom_kmer - FIXED_K);
|
|
615 search = find_AVLTree(pointer, hash_forward);
|
|
616 if(search != NULL) aux = search->next; else aux = NULL;
|
|
617 }
|
|
618 //fprintf(stdout, "%p\n", aux);
|
|
619 //fflush(stdout);
|
|
620 }
|
|
621 //printf("SWITCHED\n");
|
|
622
|
|
623 if(NWaligned == 1 && hta->full_comp == FALSE){
|
|
624 if(curr_read < hta->query->n_seqs) curr_pos = hta->query->start_pos[curr_read+1]-2;
|
|
625 }else{
|
|
626 memcpy(b_aux, curr_kmer, custom_kmer);
|
|
627 memcpy(curr_kmer, &b_aux[1], custom_kmer-1);
|
|
628 crrSeqL -= 1;
|
|
629 }
|
|
630 }
|
|
631 //printf("current pos: %"PRIu64"\n", curr_pos);
|
|
632 curr_pos++;
|
|
633 if(curr_pos < hta->query->total_len) c = (char) hta->query->sequences[curr_pos];
|
|
634
|
|
635
|
|
636
|
|
637 }
|
|
638
|
|
639
|
|
640
|
|
641
|
|
642
|
|
643 }
|
|
644
|
|
645
|
|
646
|
|
647
|
|
648
|
|
649 //fprintf(stdout, "Going from %"PRIu64" to %"PRIu64"\n", hta->from, hta->to);
|
|
650 //fflush(stdout);
|
|
651
|
|
652 free(cell_path_y);
|
|
653
|
|
654 return NULL;
|
|
655
|
|
656 }
|
|
657
|
|
658 void build_alignment(char * reconstruct_X, char * reconstruct_Y, uint64_t curr_db_seq, uint64_t curr_read, HashTableArgs * hta, unsigned char * my_x, unsigned char * my_y, struct cell ** table, struct positioned_cell * mc, char * writing_buffer_alignment, BasicAlignment * ba, uint64_t xlen, uint64_t ylen, int64_t * cell_path_y, long double * window){
|
|
659
|
|
660
|
|
661 //Do some printing of alignments here
|
|
662 uint64_t maximum_len, i, j, curr_pos_buffer, curr_window_size;
|
|
663
|
|
664 maximum_len = 2*MAX(xlen,ylen);
|
|
665 memcpy(my_x, &hta->database->sequences[hta->database->start_pos[curr_db_seq]], xlen);
|
|
666 memcpy(my_y, &hta->query->sequences[hta->query->start_pos[curr_read]], ylen);
|
|
667
|
|
668 struct best_cell bc = NW(my_x, 0, xlen, my_y, 0, ylen, (int64_t) hta->igap, (int64_t) hta->egap, table, mc, 0, cell_path_y, window, &curr_window_size);
|
|
669 backtrackingNW(my_x, 0, xlen, my_y, 0, ylen, table, reconstruct_X, reconstruct_Y, &bc, &i, &j, ba, cell_path_y, curr_window_size);
|
|
670 uint64_t offset = 0, before_i = 0, before_j = 0;
|
|
671 i++; j++;
|
|
672
|
|
673 #ifdef VERBOSE
|
|
674 uint64_t z=0;
|
|
675 for(z=0;z<maximum_len;z++) printf("%c", reconstruct_X[z]);
|
|
676 printf("\n");
|
|
677 for(z=0;z<maximum_len;z++) printf("%c", reconstruct_Y[z]);
|
|
678 #endif
|
|
679
|
|
680 curr_pos_buffer = 0;
|
|
681 while(i <= maximum_len && j <= maximum_len){
|
|
682 offset = 0;
|
|
683 before_i = i;
|
|
684 writing_buffer_alignment[curr_pos_buffer++] = 'D';
|
|
685 writing_buffer_alignment[curr_pos_buffer++] = '\t';
|
|
686 while(offset < ALIGN_LEN && i <= maximum_len){
|
|
687 //fprintf(stdout, "%c", reconstruct_X[i]);
|
|
688 writing_buffer_alignment[curr_pos_buffer++] = (char) reconstruct_X[i];
|
|
689 i++;
|
|
690 offset++;
|
|
691 }
|
|
692 //fprintf(out, "\n");
|
|
693
|
|
694 writing_buffer_alignment[curr_pos_buffer++] = '\n';
|
|
695 offset = 0;
|
|
696 before_j = j;
|
|
697
|
|
698 //fprintf(stdout, "\n");
|
|
699
|
|
700 writing_buffer_alignment[curr_pos_buffer++] = 'Q';
|
|
701 writing_buffer_alignment[curr_pos_buffer++] = '\t';
|
|
702
|
|
703 while(offset < ALIGN_LEN && j <= maximum_len){
|
|
704 //fprintf(stdout, "%c", reconstruct_Y[j]);
|
|
705 writing_buffer_alignment[curr_pos_buffer++] = (char) reconstruct_Y[j];
|
|
706 j++;
|
|
707 offset++;
|
|
708 }
|
|
709 //fprintf(out, "\n");
|
|
710 writing_buffer_alignment[curr_pos_buffer++] = '\n';
|
|
711 writing_buffer_alignment[curr_pos_buffer++] = ' ';
|
|
712 writing_buffer_alignment[curr_pos_buffer++] = '\t';
|
|
713 while(before_i < i){
|
|
714 if(reconstruct_X[before_i] != '-' && reconstruct_Y[before_j] != '-' && reconstruct_X[before_i] == reconstruct_Y[before_j]){
|
|
715 //fprintf(out, "*");
|
|
716 writing_buffer_alignment[curr_pos_buffer++] = '*';
|
|
717 ba->identities++;
|
|
718 }else{
|
|
719 //fprintf(out, " ");
|
|
720 writing_buffer_alignment[curr_pos_buffer++] = ' ';
|
|
721 }
|
|
722 before_j++;
|
|
723 before_i++;
|
|
724 }
|
|
725 writing_buffer_alignment[curr_pos_buffer++] = '\n';
|
|
726
|
|
727 }
|
|
728 //fprintf(out, "\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n");
|
|
729 writing_buffer_alignment[curr_pos_buffer++] = '\n';
|
|
730 writing_buffer_alignment[curr_pos_buffer++] = '\0';
|
|
731
|
|
732
|
|
733 }
|
|
734
|
|
735 void alignmentFromQuickHits(SeqInfo * database, SeqInfo * query, uint64_t pos_database, uint64_t pos_query, uint64_t curr_read, uint64_t curr_db_seq, Quickfrag * qf, uint64_t offset_db_reads, uint64_t offset_db_coordinates){
|
|
736
|
|
737 int64_t read_x_start, read_x_end, read_y_start, read_y_end;
|
|
738
|
|
739 if(curr_db_seq == database->n_seqs-1){
|
|
740 read_x_start = database->start_pos[curr_db_seq];
|
|
741 read_x_end = database->total_len;
|
|
742 }else{
|
|
743 read_x_start = database->start_pos[curr_db_seq];
|
|
744 read_x_end = database->start_pos[curr_db_seq+1] - 1;
|
|
745 }
|
|
746
|
|
747 //printf("read x start -> %"PRId64", end -> %"PRId64" btw: %"PRIu64"\n", read_x_start, read_x_end, database->n_seqs);
|
|
748
|
|
749 if(curr_read == query->n_seqs-1){
|
|
750 read_y_start = query->start_pos[curr_read];
|
|
751 read_y_end = query->total_len;
|
|
752 }else{
|
|
753 read_y_start = query->start_pos[curr_read];
|
|
754 read_y_end = query->start_pos[curr_read+1] - 1;
|
|
755 }
|
|
756
|
|
757 //printf("db_end %"PRId64" query_end %"PRId64"\n", read_x_end, read_y_end);
|
|
758 //printf("pos database: %"PRIu64"\n", pos_database);
|
|
759 int64_t curr_pos_db = (int64_t) pos_database;
|
|
760 int64_t curr_pos_qy = (int64_t) pos_query;
|
|
761 int64_t final_end_x = (int64_t) pos_database - 1, final_start_x = final_end_x - custom_kmer + 1, final_start_y = pos_query - custom_kmer;
|
|
762 int64_t score_right = custom_kmer * POINT;
|
|
763 int64_t score_left = score_right;
|
|
764 int64_t high_left = score_left, high_right = score_right;
|
|
765 qf->t_len = custom_kmer;
|
|
766 uint64_t idents = custom_kmer;
|
|
767
|
|
768 /*
|
|
769 char le_hit[1000];
|
|
770 memcpy(le_hit, &database->sequences[final_start_x], FIXED_K);
|
|
771
|
|
772 fprintf(stdout, "HIT: %s\n", le_hit);
|
|
773 fflush(stdout);
|
|
774 */
|
|
775 //printf("final start x: %"PRId64"\n", final_start_x);
|
|
776 int keep_going = 1;
|
|
777
|
|
778 //Forward search
|
|
779 while(keep_going == 1){
|
|
780
|
|
781
|
|
782 if(score_right > 0 && curr_pos_db < database->total_len && curr_pos_qy < query->total_len){
|
|
783 if(curr_pos_db > read_x_end || curr_pos_qy > read_y_end) break;
|
|
784 //if(database->sequences[curr_pos_db] == query->sequences[curr_pos_qy]){ score_right+=POINT; idents++; }else{ score_right-=POINT;}
|
|
785 if(compare_letters(database->sequences[curr_pos_db], query->sequences[curr_pos_qy]) == POINT){ score_right+=POINT; idents++; }else{ score_right-=POINT;}
|
|
786 if(high_right <= score_right){
|
|
787 final_end_x = curr_pos_db;
|
|
788 high_right = score_right;
|
|
789 }
|
|
790 curr_pos_db++;
|
|
791 curr_pos_qy++;
|
|
792 }else{
|
|
793 keep_going = 0;
|
|
794 }
|
|
795 }
|
|
796
|
|
797 //printf("pos here %"PRIu64" curr_pos_db, curr_pos_query %"PRIu64"\n", curr_pos_db, curr_pos_qy);
|
|
798 //printf("final start x: %"PRId64"\n", final_start_x);
|
|
799 keep_going = 1;
|
|
800 curr_pos_db = pos_database - custom_kmer - 1;
|
|
801 curr_pos_qy = pos_query - custom_kmer - 1;
|
|
802
|
|
803 score_left = high_right;
|
|
804
|
|
805 //Backward search
|
|
806 while(keep_going == 1){
|
|
807
|
|
808 if(score_left > 0 && curr_pos_db >= 0 && curr_pos_qy >= 0){
|
|
809 if(curr_pos_db < read_x_start || curr_pos_qy < read_y_start ) break;
|
|
810 //if(database->sequences[curr_pos_db] == query->sequences[curr_pos_qy]){ score_left+=POINT; idents++; }else{ score_left-=POINT;}
|
|
811 if(compare_letters(database->sequences[curr_pos_db], query->sequences[curr_pos_qy]) == POINT){ score_left+=POINT; idents++; }else{ score_left-=POINT;}
|
|
812 if(high_left <= score_left){
|
|
813 final_start_x = curr_pos_db;
|
|
814 final_start_y = curr_pos_qy;
|
|
815 //printf("got %"PRIu64" when min is %"PRIu64"\n", final_start_y, read_y_start);
|
|
816 high_left = score_left;
|
|
817 }
|
|
818 curr_pos_db--;
|
|
819 curr_pos_qy--;
|
|
820 }else{
|
|
821 keep_going = 0;
|
|
822 }
|
|
823 }
|
|
824
|
|
825 qf->t_len = final_end_x - final_start_x;
|
|
826
|
|
827 /*
|
|
828 char s1[1000];
|
|
829 char s2[1000];
|
|
830
|
|
831 memcpy(s1, &database->sequences[final_start_x], qf->t_len);
|
|
832 memcpy(s2, &query->sequences[final_start_y], qf->t_len);
|
|
833 s1[qf->t_len] = '\0';
|
|
834 s2[qf->t_len] = '\0';
|
|
835
|
|
836 fprintf(stdout, "%s\n%s\n------\n", s1, s2);
|
|
837 fflush(stdout);
|
|
838
|
|
839 printf("the real hit was:\n");
|
|
840 memcpy(s1, &database->sequences[pos_database-12+1], 12);
|
|
841 memcpy(s2, &query->sequences[pos_query-12+1], 12);
|
|
842 s1[12] = '\0';
|
|
843 s2[12] = '\0';
|
|
844
|
|
845 fprintf(stdout, "%s\n%s\n------\n", s1, s2);
|
|
846 fflush(stdout);
|
|
847 //getchar();
|
|
848 printf("%"PRIu64"\n", idents);
|
|
849 */
|
|
850
|
|
851 long double rawscore = (idents*POINT) - (qf->t_len - idents)*(POINT);
|
|
852
|
|
853 long double t_len;
|
|
854 if(curr_read == query->n_seqs-1){
|
|
855 t_len = (long double) query->total_len - query->start_pos[curr_read];
|
|
856 }else{
|
|
857 t_len = (long double) query->start_pos[curr_read+1] - query->start_pos[curr_read];
|
|
858 }
|
|
859 //printf("final start x: %"PRId64"\n", final_start_x);
|
|
860 qf->x_start = final_start_x;
|
|
861 qf->y_start = final_start_y;
|
|
862 qf->e_value = (long double) QF_KARLIN*t_len*database->total_len*expl(-QF_LAMBDA * rawscore);
|
|
863 qf->coverage = qf->t_len / t_len;
|
|
864
|
|
865 }
|
|
866
|
|
867 void calculate_y_cell_path(Point p0, Point p1, Point p2, Point p3, int64_t * y_points){
|
|
868
|
|
869 //Calculate lines between points
|
|
870 uint64_t i;
|
|
871
|
|
872 #ifdef VERBOSE
|
|
873 printf("Built on\n");
|
|
874 printf("(%"PRIu64", %"PRIu64")\n", p0.x, p0.y);
|
|
875 printf("(%"PRIu64", %"PRIu64")\n", p1.x, p1.y);
|
|
876 printf("(%"PRIu64", %"PRIu64")\n", p2.x, p2.y);
|
|
877 printf("(%"PRIu64", %"PRIu64")\n", p3.x, p3.y);
|
|
878 #endif
|
|
879
|
|
880
|
|
881
|
|
882 if(p0.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p0.x); terror("Reached max length in read for anchoring procedure (1)"); }
|
|
883 if(p1.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p1.x); terror("Reached max length in read for anchoring procedure (2)"); }
|
|
884 if(p2.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p2.x); terror("Reached max length in read for anchoring procedure (3)"); }
|
|
885 if(p3.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p3.x); terror("Reached max length in read for anchoring procedure (4)"); }
|
|
886
|
|
887 long double deltax, deltay, deltaerr, error;
|
|
888 uint64_t y;
|
|
889
|
|
890 //P0 to P1
|
|
891 deltax = p1.x - p0.x;
|
|
892 deltay = p1.y - p0.y;
|
|
893 if(deltax != 0) deltaerr = fabsl(deltay/deltax); else deltaerr = 0;
|
|
894 //printf("Deltas x: %Le y: %Le Error: %Le\n", deltax, deltay, deltaerr);
|
|
895 error = deltaerr - 0.5;
|
|
896 y = p0.y;
|
|
897
|
|
898 for(i=p0.x;i<p1.x;i++){
|
|
899 y_points[i] = (int64_t) y;
|
|
900 error = error + deltaerr;
|
|
901 if(error >= 0.5){
|
|
902 y++;
|
|
903 error = error - 1;
|
|
904 }
|
|
905 }
|
|
906
|
|
907 //P1 to P2
|
|
908
|
|
909 deltax = p2.x - p1.x;
|
|
910 deltay = p2.y - p1.y;
|
|
911 if(deltax != 0) deltaerr = fabsl(deltay/deltax); else deltaerr = 0;
|
|
912 //printf("Deltas x: %Le y: %Le Error: %Le\n", deltax, deltay, deltaerr);
|
|
913 error = deltaerr - 0.5;
|
|
914 y = p1.y;
|
|
915
|
|
916 for(i=p1.x;i<p2.x;i++){
|
|
917 y_points[i] = (int64_t) y;
|
|
918 error = error + deltaerr;
|
|
919 if(error >= 0.5){
|
|
920 y++;
|
|
921 error = error - 1;
|
|
922 }
|
|
923 }
|
|
924
|
|
925 //P2 to P3
|
|
926
|
|
927 deltax = p3.x - p2.x;
|
|
928 deltay = p3.y - p2.y;
|
|
929 if(deltax != 0) deltaerr = fabsl(deltay/deltax); else deltaerr = 0;
|
|
930 //printf("Deltas x: %Le y: %Le Error: %Le\n", deltax, deltay, deltaerr);
|
|
931 error = deltaerr - 0.5;
|
|
932 y = p2.y;
|
|
933
|
|
934 for(i=p2.x;i<p3.x;i++){
|
|
935 y_points[i] = (int64_t) y;
|
|
936 error = error + deltaerr;
|
|
937 if(error >= 0.5){
|
|
938 y++;
|
|
939 error = error - 1;
|
|
940 }
|
|
941 }
|
|
942
|
|
943 /*
|
|
944 if(p3.x == 799 && p3.y == 2497){
|
|
945 for(i=0;i<p3.x;i++){
|
|
946 printf("%"PRIu64": %"PRIu64"\n", i, y_points[i]);
|
|
947 if(i % 50 == 0) getchar();
|
|
948 }
|
|
949 }
|
|
950 */
|
|
951
|
|
952 #ifdef VERBOSE
|
|
953 for(i=0;i<p3.x;i++){
|
|
954 printf("%"PRIu64" -> ", y_points[i]);
|
|
955 if(i % 50 == 0) getchar();
|
|
956 }
|
|
957
|
|
958 #endif
|
|
959
|
|
960
|
|
961
|
|
962 }
|
|
963
|
|
964 struct best_cell NW(unsigned char * X, uint64_t Xstart, uint64_t Xend, unsigned char * Y, uint64_t Ystart, uint64_t Yend, int64_t iGap, int64_t eGap, struct cell ** table, struct positioned_cell * mc, int show, int64_t * cell_path_y, long double * window, uint64_t * current_window_size){
|
|
965
|
|
966
|
|
967 uint64_t i, j, j_prime;
|
|
968 int64_t scoreDiagonal = INT64_MIN, scoreLeft = INT64_MIN, scoreRight = INT64_MIN, score = INT64_MIN, delta_dif_1_0, delta_dif_2_1, limit_left, limit_right, j_right_prime = 1, j_left_prime = 1, j_diag_prime = 1;
|
|
969
|
|
970 struct best_cell bc;
|
|
971 bc.c.score = INT64_MIN;
|
|
972 bc.c.xpos = 0; bc.c.ypos = 0;
|
|
973
|
|
974 //The window size will be a +-15% of the square root of the product of lengths
|
|
975 int64_t window_size = MIN(MAX_WINDOW_SIZE/2, (uint64_t) (*window * sqrtl((long double) Xend * (long double) Yend)));
|
|
976 //printf("xlen: %"PRIu64", ylen: %"PRIu64" w-size: %"PRId64"\n", Xend, Yend, window_size);
|
|
977 *current_window_size = (uint64_t) window_size;
|
|
978
|
|
979 //The limits to the window
|
|
980 limit_left = 0;
|
|
981 limit_right = 2*window_size + 1;
|
|
982 if(limit_right > MAX_WINDOW_SIZE) limit_right = MAX_WINDOW_SIZE;
|
|
983
|
|
984 struct positioned_cell mf;
|
|
985 mf.score = INT64_MIN;
|
|
986
|
|
987
|
|
988 //First row. iCounter serves as counter from zero
|
|
989 //printf("..0%%");
|
|
990 //Zero will always be
|
|
991 table[0][0].score = compare_letters(X[0], Y[0]);
|
|
992 mc[0].score = table[0][0].score;
|
|
993 mc[0].xpos = 0;
|
|
994 mc[0].ypos = 0;
|
|
995
|
|
996 //if(Xend == 799 && Yend == 2497) printf("I am %p The count is real %.5s %.5s %p %p \n", &table[0][0], X, Y, X, Y);
|
|
997
|
|
998
|
|
999 for(i=1;i<Yend;i++){
|
|
1000 //table[0][i].score = (X[0] == Y[i]) ? POINT : -POINT;
|
|
1001 if(i < MAX_WINDOW_SIZE) table[0][i].score = compare_letters(X[0], Y[i]) + iGap + (i-1)*eGap;
|
|
1002 //table[Xstart][i].xfrom = Xstart;
|
|
1003 //table[Xstart][i].yfrom = i;
|
|
1004 //Set every column max
|
|
1005 mc[i].score = compare_letters(X[0], Y[i]) + iGap + (i-1)*eGap;
|
|
1006 #ifdef VERBOSE
|
|
1007 printf("%02"PRId64" ", mc[i].score);
|
|
1008 #endif
|
|
1009 mc[i].xpos = 0;
|
|
1010 mc[i].ypos = i;
|
|
1011
|
|
1012 }
|
|
1013 #ifdef VERBOSE
|
|
1014 printf("\n");
|
|
1015 #endif
|
|
1016 //Set row max
|
|
1017 mf.score = table[0][0].score;
|
|
1018 mf.xpos = 0;
|
|
1019 mf.ypos = 0;
|
|
1020 //Init j
|
|
1021 j = MAX(1,(cell_path_y[1] - window_size));
|
|
1022
|
|
1023 //Go through full matrix
|
|
1024 for(i=1;i<Xend;i++){
|
|
1025 //Fill first rowcell
|
|
1026 if(cell_path_y[i-1]+window_size < cell_path_y[i]) return bc; //terror("Sequence proportions make window shift too large");
|
|
1027 //Conversion for the j-coordinate
|
|
1028 j_prime = 1;
|
|
1029
|
|
1030 //table[i][0].score = (X[i] == Y[0]) ? POINT : -POINT;
|
|
1031 if(cell_path_y[i] - window_size <= 0){
|
|
1032 table[i][0].score = compare_letters(X[i], Y[0]) + iGap + (i-1)*eGap;
|
|
1033 mf.score = table[i][0].score;
|
|
1034 }else{
|
|
1035 mf.score = compare_letters(X[i], Y[0]) + iGap + (i-1)*eGap;
|
|
1036 }
|
|
1037
|
|
1038 mf.xpos = i-1;
|
|
1039 mf.ypos = 0;
|
|
1040
|
|
1041 delta_dif_1_0 = MAX(1, (cell_path_y[i] - window_size)) - MAX(1,(cell_path_y[i-1] - window_size)); //j-1
|
|
1042 if(i>1) delta_dif_2_1 = MAX(1, (cell_path_y[i-1] - window_size)) - MAX(1, (cell_path_y[i-2] - window_size)); //j-2
|
|
1043
|
|
1044 #ifdef VERBOSE
|
|
1045 printf("D1_0: %"PRId64" D2_1: %"PRId64"\n", delta_dif_1_0, delta_dif_2_1);
|
|
1046 #endif
|
|
1047
|
|
1048 #ifdef VERBOSE
|
|
1049 printf("%02"PRId64" ", mf.score);
|
|
1050 #endif
|
|
1051 //printf("Check on i: (%"PRIu64") from - to (%"PRIu64", %"PRIu64")\n", i, 0L, Xend);
|
|
1052 /*
|
|
1053 if(1||i==262){
|
|
1054 printf("I will go from %"PRIu64" to %"PRIu64" and I am %"PRIu64", %"PRIu64"\n", (uint64_t) MAX(1,(cell_path_y[i] - (int64_t)window_size)), (uint64_t) MIN((int64_t)Yend,(cell_path_y[i] + (int64_t)window_size)), i, j);
|
|
1055 //printf("lengs: %"PRIu64", %"PRIu64"\n", Xend, Yend);
|
|
1056 //printf("cp[i]: %"PRId64", cp[i-1] %"PRId64"\n", cell_path_y[i], cell_path_y[i-1]);
|
|
1057 //printf("min(%"PRId64", %"PRId64" + %"PRId64")-------------------\n", Yend, cell_path_y[i] ,(int64_t)window_size);
|
|
1058
|
|
1059 }
|
|
1060 */
|
|
1061 //getchar();
|
|
1062
|
|
1063 //printf("@%"PRIu64"[%"PRId64"] -> (%"PRIu64", %"PRIu64") jp %"PRIu64", lright %"PRIu64"\n", i, cell_path_y[i], MAX(1,(cell_path_y[i] - window_size)), MIN(Yend,(cell_path_y[i] + window_size)), j_prime, limit_right);
|
|
1064 //printf("M:@%"PRIu64"-> %"PRIu64"\n", i, MIN(Yend,(cell_path_y[i] + window_size)));
|
|
1065 #ifdef VERBOSE
|
|
1066 int64_t r;
|
|
1067 for(r=0;r<MAX(0,(cell_path_y[i] - window_size)); r++){
|
|
1068 printf(" ");
|
|
1069 }
|
|
1070 #endif
|
|
1071
|
|
1072 /*
|
|
1073 if(Xend == 799 && Yend == 2497 && i >= 145 && i <= 155){
|
|
1074 printf("them limits @i %"PRIu64"::: %"PRIu64", %"PRIu64"\n", i, MAX(1,(cell_path_y[i] - window_size)), MIN(Yend,(cell_path_y[i] + window_size)));
|
|
1075 getchar();
|
|
1076 }
|
|
1077 */
|
|
1078
|
|
1079
|
|
1080 for(j=MAX(1,(cell_path_y[i] - window_size));j<MIN(Yend,(cell_path_y[i] + window_size)) && j_prime < limit_right;j++){
|
|
1081 //if(i == 8302){ printf("Doing on : (%"PRIu64",%"PRIu64" and jprime=%"PRIu64"\n", i,j,j_prime); getchar(); }
|
|
1082 //Check if max in row has changed
|
|
1083 //if(j > MAX(1, cell_path_y[i-1] - window_size +1) && mf.score <= table[i][j-2].score){
|
|
1084 //if(j_prime == MAX_WINDOW_SIZE) break;
|
|
1085 //Calculate the real j position in the windowed table
|
|
1086
|
|
1087 j_left_prime = ((int64_t)j_prime - (2 - delta_dif_1_0));
|
|
1088 //j_diag_prime = ((int64_t)j_prime - (1 - delta_dif_1_0));
|
|
1089 j_diag_prime = ((int64_t)j_prime - (1 - delta_dif_1_0));
|
|
1090 if(i > 1){
|
|
1091 j_right_prime = ((int64_t)j_prime - (1 - (delta_dif_1_0 + delta_dif_2_1)));
|
|
1092 }
|
|
1093
|
|
1094 if(j > MAX(1, cell_path_y[i-1] - window_size +1) && j < MIN(Yend,(cell_path_y[i-1] + window_size)) && j_left_prime < limit_right && table[i-1][j_left_prime].score >= mf.score){
|
|
1095 //mf.score = table[i-1][j-2].score;
|
|
1096 mf.score = table[i-1][j_left_prime].score;
|
|
1097 mf.xpos = i-1;
|
|
1098 mf.ypos = j-2;
|
|
1099 if(table[i-1][j_left_prime].score == INT64_MIN){ printf("A: mf.x\t%"PRIu64"\tmf.y\t%"PRIu64"\ts%"PRId64"\n", mf.xpos, mf.ypos, mf.score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); }
|
|
1100
|
|
1101 }
|
|
1102 //printf("RowMax: %"PRId64"@(%"PRIu64", %"PRIu64")\t", mf.score, mf.xpos, mf.ypos);
|
|
1103
|
|
1104 //score = (X[i] == Y[j]) ? POINT : -POINT;
|
|
1105 score = compare_letters(X[i], Y[j]);
|
|
1106
|
|
1107 //Precondition: Upper row needs to reach up to diagonal
|
|
1108 //if((cell_path_y[i-1]+window_size) >= j-1){
|
|
1109 if(i > 1 && j >= 1 && j-1 >= MAX(1,(cell_path_y[i-2] - window_size)) && j-1 < MIN(Yend,(cell_path_y[i-2] + window_size)) && j_right_prime >= limit_left && j_right_prime < limit_right && table[i-2][j_right_prime].score >= mc[j-1].score ){
|
|
1110 //mc[j-1].score = table[i-2][j-(1+j_prime)].score;
|
|
1111 //Should be the j_prime we had at cell_path_y
|
|
1112 //MAX(1,(cell_path_y[i] - window_size));j<MIN(Yend,(cell_path_y[i] + window_size))
|
|
1113
|
|
1114 mc[j-1].score = table[i-2][j_right_prime].score;
|
|
1115 mc[j-1].xpos = i-2;
|
|
1116 mc[j-1].ypos = j-1;
|
|
1117
|
|
1118 if(table[i-2][j_right_prime].score == INT64_MIN){ printf("A: j-1\t%"PRIu64"\tmc.xpos\t%"PRIu64"\ts%"PRId64"\n", j-1, mc[j-1].xpos, mc[j-1].score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); }
|
|
1119
|
|
1120 }
|
|
1121
|
|
1122
|
|
1123 if(j-1 >= MAX(0, (cell_path_y[i-1]-window_size)) && (cell_path_y[i-1]+window_size) >= j-1 && j_diag_prime >= limit_left && j_diag_prime < limit_right && j_diag_prime < cell_path_y[i-1]+window_size){
|
|
1124 //scoreDiagonal = table[i-1][j-1].score + score;
|
|
1125 //printf("prevdiag: %"PRId64"\n", table[i-1][j_diag_prime].score);
|
|
1126 scoreDiagonal = table[i-1][j_diag_prime].score + score;
|
|
1127 if(table[i-1][j_diag_prime].score == INT64_MIN){ printf("A: i-1\t%"PRIu64"\tj_diag\t%"PRIu64"\ts%"PRId64"\n", i-1, j_diag_prime, table[i-1][j_diag_prime].score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); }
|
|
1128 }else{
|
|
1129 scoreDiagonal = INT64_MIN;
|
|
1130 }
|
|
1131
|
|
1132 if(i>=1 && j>1){
|
|
1133 scoreLeft = mf.score + iGap + (j - (mf.ypos+2))*eGap + score;
|
|
1134
|
|
1135 if(mf.score == INT64_MIN){ printf("A: mf.x\t%"PRIu64"\tmf.y\t%"PRIu64"\ts%"PRId64"\n", mf.xpos, mf.ypos, mf.score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); }
|
|
1136 }else{
|
|
1137 scoreLeft = INT64_MIN;
|
|
1138 }
|
|
1139
|
|
1140 if(j>=1 && i>1){
|
|
1141 scoreRight = mc[j-1].score + iGap + (i - (mc[j-1].xpos+2))*eGap + score;
|
|
1142 //if(scoreRight == -12) printf("MC: %"PRId64", From: %"PRIu64", %"PRIu64"->", mc[j-1].score, mc[j-1].xpos, mc[j-1].ypos);
|
|
1143
|
|
1144 if(mc[j-1].score == INT64_MIN){ printf("A: j-1\t%"PRIu64"\tmc.xpos\t%"PRIu64"\ts%"PRId64"\n", j-1, mc[j-1].xpos, mc[j-1].score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); }
|
|
1145 }else{
|
|
1146 scoreRight = INT64_MIN;
|
|
1147 }
|
|
1148
|
|
1149 /*
|
|
1150 if(Xend == 799 && Yend == 2497 && i >= 152 && i == 153){
|
|
1151 printf("@%"PRIu64", %"PRIu64" -> scores: %"PRId64", %"PRId64", %"PRId64"\n", i, j, scoreDiagonal, scoreRight, scoreLeft);
|
|
1152 printf("in position @ jprime= %"PRIu64" cellpaths [i-2, i-1, i] are %"PRId64", %"PRId64", %"PRId64", window_size: %"PRId64", j_diag_prime: %"PRId64"\n", j_prime, cell_path_y[i-2], cell_path_y[i-1], cell_path_y[i], window_size, j_diag_prime);
|
|
1153 printf("Mfs from scoreLeft: mf.x\t%"PRIu64"\tmf.y\t%"PRIu64"\ts%"PRId64"\n", mf.xpos, mf.ypos, mf.score);
|
|
1154 getchar();
|
|
1155 }
|
|
1156 */
|
|
1157
|
|
1158 //Choose maximum
|
|
1159 /*
|
|
1160 #ifdef VERBOSE
|
|
1161 printf("The game starts at %"PRId64"\n", MAX(0, cell_path_y[i] - window_size));
|
|
1162 printf("from %c %c and I get to %"PRIu64" while j=%"PRIu64"\n", X[i], Y[j], j_prime, j);
|
|
1163 printf("j_prime: %"PRId64"\n", j_prime);
|
|
1164 printf("j_diag_prime: %"PRId64" limits[%"PRId64", %"PRId64"]\n", j_diag_prime, limit_left, limit_right);
|
|
1165 printf("Score DIAG: %"PRId64"; LEFT: %"PRId64"; RIGHT: %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight);
|
|
1166 printf("currmf: %"PRId64" mc: %"PRId64"\n", mf.score, mc[j-1].score);
|
|
1167 #endif
|
|
1168 */
|
|
1169
|
|
1170
|
|
1171 //if(i >= MAX_READ_SIZE){ printf("i=%"PRIu64"\n", i); terror("i overflowed\n");}
|
|
1172 //if(j_prime >= MAX_WINDOW_SIZE){ printf("upper : %"PRId64"\n", MIN(Yend,(cell_path_y[i] + window_size-1))); printf("jp=%"PRIu64"\n", j_prime); terror("j overflowed\n"); }
|
|
1173
|
|
1174
|
|
1175
|
|
1176 if(scoreDiagonal >= scoreLeft && scoreDiagonal >= scoreRight){
|
|
1177 //Diagonal
|
|
1178
|
|
1179 //fprintf(stdout, "The JPRIME: %"PRId64" actual pos: %"PRIu64"\n", j_prime, j); getchar();
|
|
1180 table[i][j_prime].score = scoreDiagonal;
|
|
1181 table[i][j_prime].xfrom = i-1;
|
|
1182 table[i][j_prime].yfrom = j-1;
|
|
1183
|
|
1184
|
|
1185 }else if(scoreRight > scoreLeft){
|
|
1186 table[i][j_prime].score = scoreRight;
|
|
1187 table[i][j_prime].xfrom = mc[j-1].xpos;
|
|
1188 table[i][j_prime].yfrom = mc[j-1].ypos;
|
|
1189
|
|
1190 }else{
|
|
1191 //printf("Scores %"PRId64", %"PRId64", %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight);
|
|
1192 table[i][j_prime].score = scoreLeft;
|
|
1193 table[i][j_prime].xfrom = mf.xpos;
|
|
1194 table[i][j_prime].yfrom = mf.ypos;
|
|
1195 }
|
|
1196 //printf("F: i\t%"PRIu64"\tj_prime\t%"PRIu64"\n", i, j_prime);
|
|
1197 //getchar();
|
|
1198 //if(i == 94){ printf("showing j %"PRIu64" jprime %"PRIu64" lleft %"PRIu64", llright %"PRIu64"\n", j, j_prime, limit_left, limit_right); getchar(); }
|
|
1199 //if(i == 94 && j == 374){ printf("stopped at 94, 374 s %"PRId64"\n", table[i][j_prime].score); getchar(); }
|
|
1200
|
|
1201
|
|
1202
|
|
1203
|
|
1204 /*
|
|
1205 if(i == 264 && j == 176){
|
|
1206 printf("@%"PRIu64", %"PRIu64"\n", i, j);
|
|
1207 printf("my score is %"PRId64"\n", mc[j-1].score);
|
|
1208 printf("in position @ jprime= %"PRIu64" cellpaths [i-1, i] are %"PRId64", %"PRId64"\n", j_prime, cell_path_y[i-1], cell_path_y[i]);
|
|
1209 printf("Scores %"PRId64", %"PRId64", %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight);
|
|
1210 printf("check j_right_prime == %"PRIu64"\n", j_right_prime);
|
|
1211 getchar();
|
|
1212 //exit(-1);
|
|
1213 }
|
|
1214 */
|
|
1215
|
|
1216 //check if column max has changed
|
|
1217 //New condition: check if you filled i-2, j-1
|
|
1218
|
|
1219
|
|
1220 if(i == Xend-1 || j == Yend-1){
|
|
1221
|
|
1222 if(i == Xend-1 && j != Yend-1){
|
|
1223 table[i][j_prime].score = table[i][j_prime].score + iGap + (Yend - j)*eGap;
|
|
1224 }else if(j == Yend-1 && i != Xend-1){
|
|
1225 table[i][j_prime].score = table[i][j_prime].score + iGap + (Xend - i)*eGap;
|
|
1226 }
|
|
1227 //Check for best cell
|
|
1228 if(table[i][j_prime].score >= bc.c.score){
|
|
1229
|
|
1230 /*
|
|
1231 if(i == 798 && j == 1052){ // yields 799, 2497
|
|
1232 printf("in position @ jprime= %"PRIu64" cellpaths [i-1, i] are %"PRId64", %"PRId64"\n", j_prime, cell_path_y[i-1], cell_path_y[i]);
|
|
1233 printf("Scores %"PRId64", %"PRId64", %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight);
|
|
1234 printf("score comes from %"PRIu64", %"PRIu64",\n", mc[j-1].xpos, mc[j-1].ypos);
|
|
1235 printf("IDlengths: %"PRIu64", %"PRIu64"\n", Xend, Yend);
|
|
1236
|
|
1237 //exit(-1);
|
|
1238 }
|
|
1239 */
|
|
1240
|
|
1241 bc.c.score = table[i][j_prime].score; bc.c.xpos = i; bc.c.ypos = j; bc.j_prime = j_prime;
|
|
1242 }
|
|
1243 //bc.c.score = table[i][j_prime].score; bc.c.xpos = i; bc.c.ypos = j; bc.j_prime = j_prime;
|
|
1244 }
|
|
1245
|
|
1246
|
|
1247 #ifdef VERBOSE
|
|
1248 //printf("Put score: %"PRId64"\n\n", table[i][j_prime].score);
|
|
1249 //printf("(%"PRId64")%02"PRId64" ", j_diag_prime, table[i][j_prime].score); //printf("->(%"PRIu64", %"PRIu64")", i, j); printf("[%c %c]", X[i], Y[j]);
|
|
1250 //if(scoreDiagonal >= scoreLeft && scoreDiagonal >= scoreRight) printf("*\t");
|
|
1251 //else if(scoreRight > scoreLeft) printf("{\t"); else printf("}\t");
|
|
1252 //getchar();
|
|
1253
|
|
1254 #endif
|
|
1255 j_prime++;
|
|
1256 }
|
|
1257 #ifdef VERBOSE
|
|
1258 printf("\n");
|
|
1259 getchar();
|
|
1260 #endif
|
|
1261 }
|
|
1262
|
|
1263 return bc;
|
|
1264 }
|
|
1265
|
|
1266
|
|
1267
|
|
1268 void backtrackingNW(unsigned char * X, uint64_t Xstart, uint64_t Xend, unsigned char * Y, uint64_t Ystart, uint64_t Yend, struct cell ** table, char * rec_X, char * rec_Y, struct best_cell * bc, uint64_t * ret_head_x, uint64_t * ret_head_y, BasicAlignment * ba, int64_t * cell_path_y, uint64_t window_size){
|
|
1269 uint64_t curr_x, curr_y, prev_x, prev_y, head_x, head_y, limit_x, limit_y;
|
|
1270 int64_t k, j_prime, delta_diff = 0;
|
|
1271
|
|
1272 //limit_x = 2*MAX_READ_SIZE-1;
|
|
1273 //limit_y = limit_x;
|
|
1274 limit_x = 2*MAX(Xend, Yend);
|
|
1275 limit_y = limit_x;
|
|
1276 //head_x = 2*MAX(Xend, Yend);
|
|
1277 //head_y = 2*MAX(Xend, Yend);
|
|
1278 head_x = limit_x;
|
|
1279 head_y = limit_y;
|
|
1280 curr_x = bc->c.xpos;
|
|
1281 curr_y = bc->c.ypos;
|
|
1282 #ifdef VERBOSE
|
|
1283 printf("Optimum : %"PRIu64", %"PRIu64"\n", curr_x, curr_y);
|
|
1284 #endif
|
|
1285 //printf("Optimum : %"PRIu64", %"PRIu64"\n", curr_x, curr_y);
|
|
1286
|
|
1287 prev_x = curr_x;
|
|
1288 prev_y = curr_y;
|
|
1289 int show = 0;
|
|
1290
|
|
1291 for(k=Xend-1; k>curr_x; k--) rec_X[head_x--] = '-';
|
|
1292 for(k=Yend-1; k>curr_y; k--) rec_Y[head_y--] = '-';
|
|
1293
|
|
1294 j_prime = bc->j_prime;
|
|
1295 //printf("init prime: %"PRIu64"\n", j_prime);
|
|
1296 unsigned char first_track = 1;
|
|
1297
|
|
1298 while(curr_x > 0 && curr_y > 0){
|
|
1299
|
|
1300
|
|
1301 if(first_track == 0){
|
|
1302 delta_diff = MAX(1, cell_path_y[prev_x] - (int64_t) window_size) - MAX(1, cell_path_y[curr_x] - (int64_t)window_size); //j-1
|
|
1303 j_prime = MAX(0, j_prime - (int64_t)(prev_y - curr_y) + (int64_t) delta_diff);
|
|
1304
|
|
1305
|
|
1306 if(/*(bc->c.xpos == 630 && bc->c.ypos == 541 )||*/ j_prime > MAX_WINDOW_SIZE){
|
|
1307
|
|
1308 printf("from %"PRIu64", %"PRIu64"\nto %"PRIu64", %"PRIu64"\n", prev_x, prev_y, curr_x, curr_y);
|
|
1309 printf("jp: %"PRIu64", py,cy %"PRIu64", %"PRIu64", delta: %"PRId64"\n", j_prime, prev_y, curr_y, (int64_t)delta_diff);
|
|
1310 printf("currx curry : %"PRIu64", %"PRIu64"\n", curr_x, curr_y);
|
|
1311 printf("window size: %"PRIu64"\n", window_size);
|
|
1312 printf("cp[prev, curr] : %"PRId64", %"PRId64"\n", cell_path_y[prev_x], cell_path_y[curr_x]);
|
|
1313 printf("my cell path: %"PRId64"\n", cell_path_y[curr_x]);
|
|
1314 printf("Optimum : %"PRIu64", %"PRIu64"\n", bc->c.xpos, bc->c.ypos);
|
|
1315 getchar();
|
|
1316 }
|
|
1317
|
|
1318 //j_prime = j_prime - (int64_t)(prev_y - curr_y) + (int64_t) delta_diff;
|
|
1319
|
|
1320 prev_x = curr_x;
|
|
1321 prev_y = curr_y;
|
|
1322
|
|
1323 /*
|
|
1324 if(bc->c.xpos == 798 && bc->c.ypos == 1052){
|
|
1325 printf("[%c %c]", X[prev_x], Y[prev_y]);
|
|
1326 printf("(%"PRIu64", %"PRIu64") ::: \n", curr_x, curr_y);
|
|
1327
|
|
1328 }
|
|
1329 */
|
|
1330
|
|
1331 #ifdef VERBOSE
|
|
1332 //printf("Jprime: %"PRId64" :DELTADIF:%"PRId64"\n", j_prime, delta_diff);
|
|
1333 printf("[%c %c]", X[prev_x], Y[prev_y]);
|
|
1334 printf("(%"PRIu64", %"PRIu64") ::: \n", curr_x, curr_y);
|
|
1335 //printf("(%"PRIu64", %"PRIu64") ::: \n", prev_x, prev_y);
|
|
1336 //printf("cellp Prev: %"PRId64" Post: %"PRId64"\n", cell_path_y[prev_x], cell_path_y[curr_x]);
|
|
1337 //printf("the difs? %"PRId64" the other: %"PRId64"\n", MAX(0, cell_path_y[prev_x] - (int64_t) window_size), MAX(0, cell_path_y[curr_x] - (int64_t)window_size));
|
|
1338 getchar();
|
|
1339 #endif
|
|
1340
|
|
1341 }
|
|
1342
|
|
1343 //if(table[prev_x][j_prime].xfrom > MAX_READ_SIZE || table[prev_x][j_prime].yfrom > MAX_WINDOW_SIZE) fprintf(stdout, "OH NOES !! %"PRIu64"\t%"PRId64"\t%"PRIu64"\t%"PRIu64" dangers: %"PRIu64", %"PRIu64"\n", prev_x, j_prime, Xend, Yend, table[prev_x][j_prime].xfrom, table[prev_x][j_prime].yfrom);
|
|
1344
|
|
1345 /*
|
|
1346 if(table[prev_x][j_prime].xfrom > MAX_READ_SIZE || table[prev_x][j_prime].yfrom > MAX_WINDOW_SIZE){
|
|
1347 fprintf(stdout, "OH NOES !! %"PRIu64"\t%"PRId64"\t%"PRIu64"\t%"PRIu64" dangers: %"PRIu64", %"PRIu64"\n", prev_x, j_prime, Xend, Yend, table[prev_x][j_prime].xfrom, table[prev_x][j_prime].yfrom);
|
|
1348 uint64_t k;
|
|
1349 for(k=0;k<Xend;k++){
|
|
1350 fprintf(stdout, "%c", X[k]);
|
|
1351 }
|
|
1352 fprintf(stdout, "\n");
|
|
1353 for(k=0;k<Yend;k++){
|
|
1354 fprintf(stdout, "%c", Y[k]);
|
|
1355 }
|
|
1356 fprintf(stdout, "\n");
|
|
1357 show = 1;
|
|
1358 }
|
|
1359 */
|
|
1360 if(j_prime >= MAX_WINDOW_SIZE) printf("j_prime:overflow %"PRIu64"\n", j_prime);
|
|
1361
|
|
1362
|
|
1363 curr_x = table[prev_x][j_prime].xfrom;
|
|
1364 curr_y = table[prev_x][j_prime].yfrom;
|
|
1365 first_track = 0;
|
|
1366
|
|
1367 //printf("w: %"PRIu64"- %"PRIu64"\n", curr_x, curr_y);
|
|
1368
|
|
1369
|
|
1370 if((curr_x == (prev_x - 1)) && (curr_y == (prev_y -1))){
|
|
1371 //Diagonal case
|
|
1372 //printf("DIAG\n");
|
|
1373 if(head_x == 0 || head_y == 0) goto exit_point;
|
|
1374 rec_X[head_x--] = (char) X[prev_x];
|
|
1375 rec_Y[head_y--] = (char) Y[prev_y];
|
|
1376 ba->length++;
|
|
1377
|
|
1378 }else if((prev_x - curr_x) > (prev_y - curr_y)){
|
|
1379 //Gap in X
|
|
1380 //printf("Gap X\n");
|
|
1381 if(head_x == 0 || head_y == 0) goto exit_point;
|
|
1382 if(bc->c.xpos != prev_x && bc->c.ypos != prev_y){
|
|
1383 rec_Y[head_y--] = Y[prev_y];
|
|
1384 rec_X[head_x--] = X[prev_x];
|
|
1385 }else{
|
|
1386 rec_Y[head_y--] = '-';
|
|
1387 rec_X[head_x--] = X[prev_x];
|
|
1388 }
|
|
1389 ba->length++;
|
|
1390
|
|
1391 for(k=prev_x-1;k>curr_x;k--){
|
|
1392 if(head_x == 0 || head_y == 0) goto exit_point;
|
|
1393 #ifdef VERBOSE
|
|
1394 if(head_x == 0 || head_y == 0){
|
|
1395 printf("%"PRIu64" %"PRIu64" and prevs are %"PRIu64" %"PRIu64"\n", head_x, head_y, prev_x, prev_y);
|
|
1396 printf("origin is %"PRIu64", %"PRIu64"\n", bc->c.xpos, bc->c.ypos);
|
|
1397 uint64_t z;
|
|
1398 for(z=head_x;z<limit_x;z++){
|
|
1399 fprintf(stdout, "%c", (char) rec_X[z]);
|
|
1400 }
|
|
1401 printf("\n");
|
|
1402 for(z=head_y;z<limit_y;z++){
|
|
1403 fprintf(stdout, "%c", (char) rec_Y[z]);
|
|
1404 }
|
|
1405 getchar();
|
|
1406 }
|
|
1407 #endif
|
|
1408 rec_Y[head_y--] = '-';
|
|
1409 rec_X[head_x--] = (char) X[k];
|
|
1410 ba->length++;
|
|
1411 ba->egaps++;
|
|
1412 }
|
|
1413 ba->igaps += 1;
|
|
1414 ba->egaps--;
|
|
1415 }else{
|
|
1416 //Gap in Y
|
|
1417 //printf("GAP Y\n");
|
|
1418 //10, 0, 401, 281
|
|
1419 if(head_x == 0 || head_y == 0) goto exit_point;
|
|
1420 if(bc->c.xpos != prev_x && bc->c.ypos != prev_y){
|
|
1421 rec_Y[head_y--] = Y[prev_y];
|
|
1422 rec_X[head_x--] = X[prev_x];
|
|
1423 }else{
|
|
1424 rec_Y[head_y--] = Y[prev_y];
|
|
1425 rec_X[head_x--] = '-';
|
|
1426 }
|
|
1427 ba->length++;
|
|
1428
|
|
1429 for(k=prev_y-1;k>curr_y;k--){
|
|
1430 if(head_x == 0 || head_y == 0) goto exit_point;
|
|
1431 #ifdef VERBOSE
|
|
1432 if(head_x == 0 || head_y == 0){
|
|
1433 printf("%"PRIu64" %"PRIu64" and prevs are %"PRIu64" %"PRIu64"\n", head_x, head_y, prev_x, prev_y);
|
|
1434 printf("origin is %"PRIu64", %"PRIu64"\n", bc->c.xpos, bc->c.ypos);
|
|
1435 uint64_t z;
|
|
1436 for(z=head_x;z<limit_x;z++){
|
|
1437 fprintf(stdout, "%c", (char) rec_X[z]);
|
|
1438 }
|
|
1439 printf("\n");
|
|
1440 for(z=head_y;z<limit_y;z++){
|
|
1441 fprintf(stdout, "%c", (char) rec_Y[z]);
|
|
1442 }
|
|
1443 getchar();
|
|
1444 }
|
|
1445 #endif
|
|
1446 rec_X[head_x--] = '-';
|
|
1447 rec_Y[head_y--] = (char) Y[k];
|
|
1448 ba->length++;
|
|
1449 ba->egaps++;
|
|
1450 }
|
|
1451
|
|
1452 ba->igaps += 1;
|
|
1453 ba->egaps--;
|
|
1454 }
|
|
1455
|
|
1456 }
|
|
1457
|
|
1458 if(curr_x == 0 && curr_y == 0 && (curr_x == (prev_x - 1)) && (curr_y == (prev_y -1))){
|
|
1459 rec_X[head_x--] = (char) X[curr_x];
|
|
1460 rec_Y[head_y--] = (char) Y[curr_y];
|
|
1461 ba->length++;
|
|
1462 }
|
|
1463
|
|
1464 exit_point:
|
|
1465
|
|
1466 //printf("curr: %"PRIu64", %"PRIu64"\n", curr_x, curr_y);
|
|
1467 //printf("Heads: %"PRIu64", %"PRIu64"\n", head_x, head_y);
|
|
1468 if(show == 1)fprintf(stdout, "%"PRIu64", %"PRIu64"\n", head_x, head_y);
|
|
1469 uint64_t huecos_x = 0, huecos_y = 0;
|
|
1470 k=(int64_t)curr_x-1;
|
|
1471 while(k>=0){ if(head_x == 0) break; rec_X[head_x--] = '-'; huecos_x++; k--; }
|
|
1472 k=(int64_t)curr_y-1;
|
|
1473 while(k>=0){ if(head_y == 0) break; rec_Y[head_y--] = '-'; huecos_y++; k--; }
|
|
1474
|
|
1475 if(show == 1)fprintf(stdout, "%"PRIu64", %"PRIu64"\n", head_x, head_y);
|
|
1476
|
|
1477 if(huecos_x >= huecos_y){
|
|
1478 while(huecos_x > 0) { if(head_y == 0) break; rec_Y[head_y--] = ' '; huecos_x--;}
|
|
1479 }else{
|
|
1480 while(huecos_y > 0) { if(head_x == 0) break; rec_X[head_x--] = ' '; huecos_y--;}
|
|
1481 }
|
|
1482
|
|
1483 if(show == 1){
|
|
1484 fprintf(stdout, "%"PRIu64", %"PRIu64"\n", head_x, head_y);
|
|
1485 fprintf(stdout, "%"PRIu64", %"PRIu64"\n", 2*Xend, 2*Yend);
|
|
1486 uint64_t k;
|
|
1487 for(k=head_x;k<limit_x;k++){
|
|
1488 fprintf(stdout, "%c", (char) rec_X[k]);
|
|
1489 }
|
|
1490 printf("\n");
|
|
1491 for(k=head_y;k<limit_y;k++){
|
|
1492 fprintf(stdout, "%c", (char) rec_Y[k]);
|
|
1493 }
|
|
1494 printf("\n");
|
|
1495 getchar();
|
|
1496 }
|
|
1497
|
|
1498 *ret_head_x = head_x;
|
|
1499 *ret_head_y = head_y;
|
|
1500 #ifdef VERBOSE
|
|
1501 printf("hx hy: %"PRIu64", %"PRIu64"\n", head_x, head_y);
|
|
1502 #endif
|
|
1503 }
|
|
1504
|
|
1505
|
|
1506 AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key){
|
|
1507
|
|
1508 if(mp[*n_pools_used].current == POOL_SIZE){
|
|
1509 *n_pools_used += 1;
|
|
1510 if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools");
|
|
1511 init_mem_pool_AVL(&mp[*n_pools_used]);
|
|
1512
|
|
1513 }
|
|
1514
|
|
1515 AVLTree * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current;
|
|
1516 mp[*n_pools_used].current++;
|
|
1517
|
|
1518 new_pos->key = key;
|
|
1519 new_pos->count = 1;
|
|
1520 new_pos->height = 1;
|
|
1521
|
|
1522 return new_pos;
|
|
1523 }
|
|
1524
|
|
1525 void init_mem_pool_AVL(Mempool_AVL * mp){
|
|
1526 mp->base = (AVLTree *) calloc(POOL_SIZE, sizeof(AVLTree));
|
|
1527 if(mp->base == NULL) terror("Could not request memory pool");
|
|
1528 mp->current = 0;
|
|
1529 }
|
|
1530
|
|
1531
|
|
1532
|
|
1533 /*
|
|
1534 // An AVL tree node
|
|
1535 typedef struct AVL_Node{
|
|
1536 uint64_t key;
|
|
1537 struct AVL_Node * left;
|
|
1538 struct AVL_Node * right;
|
|
1539 uint64_t height;
|
|
1540 llpos * next;
|
|
1541 } AVLTree;
|
|
1542 */
|
|
1543
|
|
1544 // A utility function to get height of the tree
|
|
1545
|
|
1546 uint64_t height(AVLTree * N){
|
|
1547 if (N == NULL)
|
|
1548 return 0;
|
|
1549 return N->height;
|
|
1550 }
|
|
1551
|
|
1552 /* Substituted by (x == NULL) ? (0) : (x->height) */
|
|
1553
|
|
1554 /* Helper function that allocates a new node with the given key and
|
|
1555 NULL left and right pointers. */
|
|
1556
|
|
1557 /* This one is substituted by AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key) */
|
|
1558
|
|
1559 // A utility function to right rotate subtree rooted with y
|
|
1560 // See the diagram given above.
|
|
1561 AVLTree * right_rotate(AVLTree * y){
|
|
1562 AVLTree * x = y->left;
|
|
1563 AVLTree * T2 = x->right;
|
|
1564
|
|
1565 // Perform rotation
|
|
1566 x->right = y;
|
|
1567 y->left = T2;
|
|
1568
|
|
1569 // Update heights
|
|
1570 //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1;
|
|
1571 //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1;
|
|
1572 // Update heights
|
|
1573 y->height = MAX(height(y->left), height(y->right))+1;
|
|
1574 x->height = MAX(height(x->left), height(x->right))+1;
|
|
1575
|
|
1576 // Return new root
|
|
1577 return x;
|
|
1578 }
|
|
1579
|
|
1580 // A utility function to left rotate subtree rooted with x
|
|
1581 // See the diagram given above.
|
|
1582 AVLTree * left_rotate(AVLTree * x){
|
|
1583 AVLTree * y = x->right;
|
|
1584 AVLTree * T2 = y->left;
|
|
1585
|
|
1586 // Perform rotation
|
|
1587 y->left = x;
|
|
1588 x->right = T2;
|
|
1589
|
|
1590 // Update heights
|
|
1591 //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1;
|
|
1592 //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1;
|
|
1593 x->height = MAX(height(x->left), height(x->right))+1;
|
|
1594 y->height = MAX(height(y->left), height(y->right))+1;
|
|
1595
|
|
1596 // Return new root
|
|
1597 return y;
|
|
1598 }
|
|
1599
|
|
1600 // Get Balance factor of node N
|
|
1601
|
|
1602 int64_t get_balance(AVLTree * N){
|
|
1603 if (N == NULL)
|
|
1604 return 0;
|
|
1605 return height(N->left) - height(N->right);
|
|
1606 }
|
|
1607
|
|
1608 /* Substituted by (node == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height) */
|
|
1609
|
|
1610 AVLTree * find_AVLTree(AVLTree * node, uint64_t key){
|
|
1611 AVLTree * found = NULL;
|
|
1612 if(node == NULL) return NULL;
|
|
1613
|
|
1614 if (key < node->key) {
|
|
1615 found = find_AVLTree(node->left, key);
|
|
1616 } else if (key > node->key) {
|
|
1617 found = find_AVLTree(node->right, key);
|
|
1618 } else {
|
|
1619 return node;
|
|
1620 }
|
|
1621 return found;
|
|
1622 }
|
|
1623
|
|
1624 llpos * find_AVLTree_llpos(AVLTree * node, uint64_t key){
|
|
1625 llpos * aux = NULL;
|
|
1626 if(node == NULL) return NULL;
|
|
1627
|
|
1628 if (key < node->key) {
|
|
1629 aux = find_AVLTree_llpos(node->left, key);
|
|
1630 } else if (key > node->key) {
|
|
1631 aux = find_AVLTree_llpos(node->right, key);
|
|
1632 } else {
|
|
1633 return node->next;
|
|
1634 }
|
|
1635 return aux;
|
|
1636 }
|
|
1637
|
|
1638 // Recursive function to insert key in subtree rooted
|
|
1639 // with node and returns new root of subtree.
|
|
1640 AVLTree * insert_AVLTree(AVLTree * node, uint64_t key, Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t pos, Mempool_l * mp_l, uint64_t * n_pools_used_l, uint64_t s_id){
|
|
1641 /* 1. Perform the normal BST insertion */
|
|
1642 if (node == NULL){
|
|
1643
|
|
1644 AVLTree * n_node = getNewLocationAVLTree(mp, n_pools_used, key);
|
|
1645 llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l);
|
|
1646 aux->pos = pos;
|
|
1647 aux->s_id = s_id;
|
|
1648 n_node->next = aux;
|
|
1649 return n_node;
|
|
1650 }
|
|
1651
|
|
1652 if (key < node->key) {
|
|
1653 node->left = insert_AVLTree(node->left, key, mp, n_pools_used, pos, mp_l, n_pools_used_l, s_id);
|
|
1654 } else if (key > node->key) {
|
|
1655 node->right = insert_AVLTree(node->right, key, mp, n_pools_used, pos, mp_l, n_pools_used_l, s_id);
|
|
1656 } else {
|
|
1657 // Equal keys are inserted as a linked list
|
|
1658 llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l);
|
|
1659 aux->pos = pos;
|
|
1660 aux->s_id = s_id;
|
|
1661 aux->next = node->next;
|
|
1662 node->next = aux;
|
|
1663 ++(node->count);
|
|
1664 return node;
|
|
1665 }
|
|
1666
|
|
1667 /* 2. Update height of this ancestor node */
|
|
1668 //node->height = 1 + MAX((node->left == NULL) ? (0) : (node->left->height), (node->right == NULL) ? (0) : (node->right->height));
|
|
1669 node->height = 1 + MAX(height(node->left), height(node->right));
|
|
1670
|
|
1671 /* 3. Get the balance factor of this ancestor
|
|
1672 node to check whether this node became
|
|
1673 unbalanced */
|
|
1674 //int64_t balance = (node->left == NULL || node->right == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height);
|
|
1675 int64_t balance = get_balance(node);
|
|
1676
|
|
1677 // If this node becomes unbalanced, then
|
|
1678 // there are 4 cases
|
|
1679
|
|
1680 // Left Left Case
|
|
1681 if (balance > 1 && key < node->left->key)
|
|
1682 return right_rotate(node);
|
|
1683
|
|
1684 // Right Right Case
|
|
1685 if (balance < -1 && key > node->right->key)
|
|
1686 return left_rotate(node);
|
|
1687
|
|
1688 // Left Right Case
|
|
1689 if (balance > 1 && key > node->left->key)
|
|
1690 {
|
|
1691 node->left = left_rotate(node->left);
|
|
1692 return right_rotate(node);
|
|
1693 }
|
|
1694
|
|
1695 // Right Left Case
|
|
1696 if (balance < -1 && key < node->right->key)
|
|
1697 {
|
|
1698 node->right = right_rotate(node->right);
|
|
1699 return left_rotate(node);
|
|
1700 }
|
|
1701
|
|
1702 /* return the (unchanged) node pointer */
|
|
1703 return node;
|
|
1704 }
|
|
1705
|
|
1706 // A utility function to print preorder traversal
|
|
1707 // of the tree.
|
|
1708 // The function also prints height of every node
|
|
1709
|
|
1710 void pre_order(AVLTree * root){
|
|
1711 if(root != NULL){
|
|
1712 printf("%"PRIu64" ", root->key);
|
|
1713 llpos * aux = root->next;
|
|
1714 while(aux != NULL){ printf("#%"PRIu64", ", aux->pos); aux = aux->next; }
|
|
1715 pre_order(root->left);
|
|
1716 pre_order(root->right);
|
|
1717 }
|
|
1718 } |