Mercurial > repos > bitlab > imsame
comparison IMSAME/src/alignmentFunctions.c @ 0:762009a91895 draft
Uploaded
| author | bitlab |
|---|---|
| date | Sat, 15 Dec 2018 18:04:10 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:762009a91895 |
|---|---|
| 1 #include <stdio.h> | |
| 2 #include <stdlib.h> | |
| 3 #include <string.h> | |
| 4 #include <ctype.h> | |
| 5 #include <pthread.h> | |
| 6 #include <inttypes.h> | |
| 7 #include <math.h> | |
| 8 #include <float.h> | |
| 9 #include "structs.h" | |
| 10 #include "alignmentFunctions.h" | |
| 11 #include "commonFunctions.h" | |
| 12 #define MAX(x, y) (((x) > (y)) ? (x) : (y)) | |
| 13 #define MIN(x, y) (((x) <= (y)) ? (x) : (y)) | |
| 14 | |
| 15 int64_t compare_letters(unsigned char a, unsigned char b){ | |
| 16 if(a != (unsigned char) 'N' && a != (unsigned char) '>') return (a == b) ? POINT : -POINT; | |
| 17 return -POINT; | |
| 18 } | |
| 19 | |
| 20 llpos * getNewLocationllpos(Mempool_l * mp, uint64_t * n_pools_used){ | |
| 21 | |
| 22 if(mp[*n_pools_used].current == POOL_SIZE){ | |
| 23 *n_pools_used += 1; | |
| 24 if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools"); | |
| 25 init_mem_pool_llpos(&mp[*n_pools_used]); | |
| 26 | |
| 27 } | |
| 28 | |
| 29 llpos * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current; | |
| 30 mp[*n_pools_used].current++; | |
| 31 | |
| 32 | |
| 33 return new_pos; | |
| 34 } | |
| 35 | |
| 36 void init_mem_pool_llpos(Mempool_l * mp){ | |
| 37 mp->base = (llpos *) calloc(POOL_SIZE, sizeof(llpos)); | |
| 38 if(mp->base == NULL) terror("Could not request memory pool"); | |
| 39 mp->current = 0; | |
| 40 } | |
| 41 | |
| 42 | |
| 43 void * load_input(void * a){ | |
| 44 | |
| 45 LoadingDBArgs * ldbargs = (LoadingDBArgs *) a; | |
| 46 | |
| 47 // Requires | |
| 48 /* | |
| 49 char * temp_seq_buffer; | |
| 50 SeqInfo * data_database; 1 per thread | |
| 51 uint64_t t_len; | |
| 52 uint64_t word_size; | |
| 53 uint64_t read_from; | |
| 54 uint64_t read_to; | |
| 55 char thread_id; | |
| 56 */ | |
| 57 | |
| 58 uint64_t c_pos; | |
| 59 | |
| 60 unsigned char curr_kmer[custom_kmer]; | |
| 61 unsigned char aux_kmer[custom_kmer+1]; | |
| 62 curr_kmer[0] = '\0'; | |
| 63 uint64_t word_size = 0, pos_in_database = 0; | |
| 64 unsigned char char_converter[91]; | |
| 65 uint64_t curr_seq = 0; | |
| 66 char_converter[(unsigned char)'A'] = 0; | |
| 67 char_converter[(unsigned char)'C'] = 1; | |
| 68 char_converter[(unsigned char)'G'] = 2; | |
| 69 char_converter[(unsigned char)'T'] = 3; | |
| 70 //llpos * aux; | |
| 71 AVLTree * pointer; | |
| 72 | |
| 73 char c; | |
| 74 | |
| 75 /* | |
| 76 if(ldbargs->thread_id == 'A'){ | |
| 77 printf("read to is: %"PRIu64"\n", ldbargs->read_to); | |
| 78 printf("make sure: %c %c %c\n", ldbargs->temp_seq_buffer[ldbargs->read_to-1], ldbargs->temp_seq_buffer[ldbargs->read_to], ldbargs->temp_seq_buffer[ldbargs->read_to+1]); | |
| 79 | |
| 80 uint64_t z = ldbargs->read_to-1; | |
| 81 while(ldbargs->temp_seq_buffer[z] != '>'){ | |
| 82 printf("%c", ldbargs->temp_seq_buffer[z]); | |
| 83 z--; | |
| 84 } | |
| 85 getchar(); | |
| 86 } | |
| 87 if(ldbargs->thread_id == 'C'){ | |
| 88 printf("HELLOOOOOOO im going from %"PRIu64"\n", ldbargs->read_from); | |
| 89 } | |
| 90 */ | |
| 91 | |
| 92 c_pos = ldbargs->read_from; | |
| 93 while(ldbargs->temp_seq_buffer[c_pos] != '>') ++c_pos; | |
| 94 ldbargs->read_from = c_pos; | |
| 95 c_pos = ldbargs->read_to; | |
| 96 while(c_pos < ldbargs->t_len && ldbargs->temp_seq_buffer[c_pos] != '>') ++c_pos; | |
| 97 ldbargs->read_to = c_pos; | |
| 98 | |
| 99 c_pos = ldbargs->read_from; | |
| 100 c = ldbargs->temp_seq_buffer[c_pos]; | |
| 101 | |
| 102 | |
| 103 //printf("thread going from %"PRIu64" to %"PRIu64"\n", ldbargs->read_from, ldbargs->read_to); | |
| 104 | |
| 105 | |
| 106 while(c_pos < ldbargs->read_to){ | |
| 107 | |
| 108 | |
| 109 if(c == '>'){ | |
| 110 | |
| 111 //if(ldbargs->thread_id == 'G') printf("putting in %"PRIu64" @ %"PRIu64"\n", curr_seq, c_pos); | |
| 112 ldbargs->data_database->start_pos[curr_seq] = pos_in_database; ++curr_seq; | |
| 113 | |
| 114 // REalloc sequences and sequence index | |
| 115 if(pos_in_database == READBUF*ldbargs->n_allocs){ | |
| 116 ldbargs->n_allocs++; ldbargs->data_database->sequences = (unsigned char *) realloc(ldbargs->data_database->sequences, READBUF*ldbargs->n_allocs*sizeof(unsigned char)); | |
| 117 if(ldbargs->data_database->sequences == NULL) terror("Could not reallocate temporary database"); | |
| 118 } | |
| 119 | |
| 120 if(curr_seq == INITSEQS*ldbargs->n_allocs){ | |
| 121 ldbargs->n_allocs++; ldbargs->data_database->start_pos = (uint64_t *) realloc(ldbargs->data_database->start_pos, INITSEQS*ldbargs->n_allocs*sizeof(uint64_t)); | |
| 122 } | |
| 123 | |
| 124 | |
| 125 | |
| 126 while(c != '\n'){ c = ldbargs->temp_seq_buffer[c_pos]; ++c_pos; } //Skip ID | |
| 127 | |
| 128 while(c != '>' && c_pos < ldbargs->read_to){ //Until next id | |
| 129 | |
| 130 //if(ldbargs->thread_id == 'A') printf("!!!!!!%"PRIu64" from:%"PRIu64", to %"PRIu64"\n", c_pos, ldbargs->read_from, ldbargs->read_to); | |
| 131 c = ldbargs->temp_seq_buffer[c_pos]; ++c_pos; | |
| 132 c = toupper(c); | |
| 133 if(c == 'A' || c == 'C' || c == 'G' || c == 'T'){ | |
| 134 curr_kmer[word_size] = (unsigned char) c; | |
| 135 if(word_size < custom_kmer) ++word_size; | |
| 136 | |
| 137 ldbargs->data_database->sequences[pos_in_database] = (unsigned char) c; ++pos_in_database; | |
| 138 // REalloc sequences and sequence index | |
| 139 if(pos_in_database == READBUF*ldbargs->n_allocs){ | |
| 140 ldbargs->n_allocs++; ldbargs->data_database->sequences = (unsigned char *) realloc(ldbargs->data_database->sequences, READBUF*ldbargs->n_allocs*sizeof(unsigned char)); | |
| 141 if(ldbargs->data_database->sequences == NULL) terror("Could not reallocate temporary database"); | |
| 142 } | |
| 143 | |
| 144 if(curr_seq == INITSEQS*ldbargs->n_allocs){ | |
| 145 ldbargs->n_allocs++; ldbargs->data_database->start_pos = (uint64_t *) realloc(ldbargs->data_database->start_pos, INITSEQS*ldbargs->n_allocs*sizeof(uint64_t)); | |
| 146 } | |
| 147 }else{ //It can be anything (including N, Y, X ...) | |
| 148 | |
| 149 if(c != '\n' && c != '\r' && c != '>'){ | |
| 150 word_size = 0; | |
| 151 ldbargs->data_database->sequences[pos_in_database] = (unsigned char) 'N'; ++pos_in_database; //Convert to N | |
| 152 // REalloc sequences and sequence index | |
| 153 if(pos_in_database == READBUF*ldbargs->n_allocs){ | |
| 154 ldbargs->n_allocs++; ldbargs->data_database->sequences = (unsigned char *) realloc(ldbargs->data_database->sequences, READBUF*ldbargs->n_allocs*sizeof(unsigned char)); | |
| 155 if(ldbargs->data_database->sequences == NULL) terror("Could not reallocate temporary database"); | |
| 156 } | |
| 157 | |
| 158 if(curr_seq == INITSEQS*ldbargs->n_allocs){ | |
| 159 ldbargs->n_allocs++; ldbargs->data_database->start_pos = (uint64_t *) realloc(ldbargs->data_database->start_pos, INITSEQS*ldbargs->n_allocs*sizeof(uint64_t)); | |
| 160 } | |
| 161 } | |
| 162 } | |
| 163 if(word_size == custom_kmer){ | |
| 164 //write to hash table | |
| 165 | |
| 166 | |
| 167 pointer = &ldbargs->ct->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
| 168 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
| 169 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
| 170 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
| 171 | |
| 172 | |
| 173 pointer = insert_AVLTree(pointer, hashOfWord(&curr_kmer[FIXED_K], custom_kmer-FIXED_K), ldbargs->mp_AVL, &ldbargs->n_pools_used_AVL, pos_in_database, ldbargs->mp, &ldbargs->n_pools_used, curr_seq-1); | |
| 174 | |
| 175 | |
| 176 | |
| 177 // CURRENTLY USING OVERLAPPING | |
| 178 | |
| 179 memcpy(aux_kmer, &curr_kmer[1], custom_kmer-1); | |
| 180 memcpy(curr_kmer, aux_kmer, custom_kmer-1); | |
| 181 word_size--; | |
| 182 | |
| 183 // For NON OVERLAPPING ENABLE THIS | |
| 184 //word_size = 0; | |
| 185 } | |
| 186 | |
| 187 } | |
| 188 word_size = 0; | |
| 189 }else{ | |
| 190 c = ldbargs->temp_seq_buffer[c_pos]; ++c_pos; | |
| 191 } | |
| 192 | |
| 193 } | |
| 194 /* | |
| 195 if(ldbargs->thread_id == 'T'){ | |
| 196 uint64_t j; | |
| 197 for(j=0; j < curr_seq-1; j++){ | |
| 198 printf("%"PRIu64" - %"PRIu64"\n", ldbargs->data_database->start_pos[j], ldbargs->data_database->start_pos[j+1]); | |
| 199 } | |
| 200 } | |
| 201 */ | |
| 202 /* | |
| 203 if(ldbargs->thread_id == 'A'){ | |
| 204 printf("\nAT %"PRIu64", and pos_database = %"PRIu64"\n", ldbargs->data_database->start_pos[curr_seq-1], pos_in_database); | |
| 205 uint64_t z = pos_in_database; | |
| 206 while(z > ldbargs->data_database->start_pos[curr_seq-1]){ | |
| 207 printf("%c", ldbargs->data_database->sequences[z]); | |
| 208 z--; | |
| 209 } | |
| 210 | |
| 211 getchar(); | |
| 212 } | |
| 213 */ | |
| 214 | |
| 215 ldbargs->data_database->start_pos[curr_seq] = pos_in_database; | |
| 216 ldbargs->data_database->total_len = pos_in_database; | |
| 217 ldbargs->contained_reads = curr_seq; | |
| 218 ldbargs->data_database->n_seqs = curr_seq; | |
| 219 ldbargs->base_coordinates = pos_in_database; | |
| 220 return NULL; | |
| 221 } | |
| 222 | |
| 223 | |
| 224 void * computeAlignmentsByThread(void * a){ | |
| 225 | |
| 226 /* | |
| 227 typedef struct { | |
| 228 SeqInfo * database; //Database sequence and lengths | |
| 229 SeqInfo * query; //Query sequence and lengths | |
| 230 uint64_t from; //Starting READ to compute alignments from | |
| 231 uint64_t to; //End READ to compute alignments from | |
| 232 Container * container; //Container to hold the multidimensional array | |
| 233 uint64_t accepted_query_reads; //Number of reads that have a fragment with evalue less than specified | |
| 234 long double min_e_value; //Minimum evalue to accept read | |
| 235 } HashTableArgs; | |
| 236 */ | |
| 237 | |
| 238 HashTableArgs * hta = (HashTableArgs *) a; | |
| 239 Queue * my_current_task = NULL; | |
| 240 | |
| 241 unsigned char char_converter[91]; | |
| 242 char_converter[(unsigned char)'A'] = 0; | |
| 243 char_converter[(unsigned char)'C'] = 1; | |
| 244 char_converter[(unsigned char)'G'] = 2; | |
| 245 char_converter[(unsigned char)'T'] = 3; | |
| 246 Quickfrag qf; | |
| 247 int64_t * cell_path_y = (int64_t *) malloc(MAX_READ_SIZE*sizeof(int64_t)); | |
| 248 if(cell_path_y == NULL) terror("Could not allocate cell paths"); | |
| 249 | |
| 250 | |
| 251 Point p0, p1, p2, p3; //Points for NW anchored | |
| 252 p0.x = 0; p0.y = 0; | |
| 253 | |
| 254 AVLContainer * ptr_table_redirect[4]; | |
| 255 ptr_table_redirect[0] = hta->container_a; | |
| 256 ptr_table_redirect[1] = hta->container_b; | |
| 257 ptr_table_redirect[2] = hta->container_c; | |
| 258 ptr_table_redirect[3] = hta->container_d; | |
| 259 unsigned char current_table = 0; | |
| 260 | |
| 261 | |
| 262 | |
| 263 //To keep track of which reads are we reading | |
| 264 uint64_t curr_read, curr_db_seq, xlen, ylen; | |
| 265 uint64_t crrSeqL, pos_of_hit = 0xFFFFFFFFFFFFFFFF; | |
| 266 | |
| 267 //Reading from buffer | |
| 268 char c; | |
| 269 unsigned char curr_kmer[custom_kmer], b_aux[custom_kmer]; | |
| 270 llpos * aux; | |
| 271 AVLTree * pointer; | |
| 272 | |
| 273 // For NW-alignment | |
| 274 int NWaligned; | |
| 275 uint64_t n_hits, alignments_tried; | |
| 276 | |
| 277 BasicAlignment ba; //The resulting alignment from the NW | |
| 278 uint64_t curr_pos = 0; //Reading-head position | |
| 279 uint64_t up_to = 0; | |
| 280 | |
| 281 int64_t last_diagonal = INT64_MIN; // Diagonal to skip repeated hits | |
| 282 unsigned char already_aligned = FALSE; // To not count more times the same read | |
| 283 | |
| 284 | |
| 285 | |
| 286 | |
| 287 //Get next operation in queue | |
| 288 while(NULL != ( my_current_task = get_task_from_queue(hta->queue_head, hta->lock))){ | |
| 289 //Initialize all variables | |
| 290 | |
| 291 qf.x_start = qf.y_start = qf.t_len = 0; | |
| 292 qf.e_value = LDBL_MAX; | |
| 293 last_diagonal = INT64_MIN; | |
| 294 | |
| 295 //Starting from | |
| 296 curr_read = my_current_task->r1; | |
| 297 crrSeqL = 0; pos_of_hit = 0; | |
| 298 | |
| 299 curr_kmer[0] = '\0'; b_aux[0] = '\0'; | |
| 300 aux = NULL; | |
| 301 pointer = NULL; | |
| 302 | |
| 303 NWaligned = 0; | |
| 304 n_hits = 0; | |
| 305 alignments_tried = 0; | |
| 306 | |
| 307 ba.identities = 0; ba.length = 0; ba.igaps = 0xFFFFFFFFFFFFFFFF; ba.egaps = 0xFFFFFFFFFFFFFFFF; | |
| 308 memset(&hta->markers[0], 0, hta->database->n_seqs); // Reset used tags | |
| 309 already_aligned = FALSE; | |
| 310 | |
| 311 //Set current header position at the position of the read start (the ">") | |
| 312 curr_pos = hta->query->start_pos[curr_read]; //Skip the ">" | |
| 313 c = (char) hta->query->sequences[curr_pos]; | |
| 314 | |
| 315 //printf("Im doing from %"PRIu64" to %"PRIu64", nseqs=%"PRIu64"\n", my_current_task->r1, my_current_task->r2, hta->query->n_seqs); | |
| 316 //getchar(); | |
| 317 | |
| 318 while(curr_read < my_current_task->r2 && curr_pos < hta->query->total_len){ | |
| 319 | |
| 320 | |
| 321 | |
| 322 if(curr_read != hta->query->n_seqs) up_to = hta->query->start_pos[curr_read+1]-1; else up_to = hta->query->total_len; | |
| 323 //printf("Currrpos: %"PRIu64" up to: %"PRIu64" on read: %"PRIu64"\n", curr_pos, up_to, curr_read); | |
| 324 | |
| 325 if (curr_pos == up_to) { // Comment, empty or quality (+) line | |
| 326 crrSeqL = 0; // Reset buffered sequence length | |
| 327 #ifdef VERBOSE | |
| 328 printf("Read: %"PRIu64" yielded (%d)\n", curr_read, NWaligned); | |
| 329 #endif | |
| 330 //if(NWaligned == 0){ printf("Read: %"PRIu64" yielded (%d)\n", curr_read, NWaligned);} | |
| 331 //printf("Read: %"PRIu64" yielded (%d)\n", curr_read, NWaligned); if(NWaligned == 0) getchar(); | |
| 332 NWaligned = 0; | |
| 333 //fprintf(stdout, "Seq %"PRIu64" has %"PRIu64" hits and tried to align %"PRIu64" times\n", curr_read, n_hits, alignments_tried); | |
| 334 //fflush(stdout); | |
| 335 n_hits = 0; | |
| 336 already_aligned = FALSE; | |
| 337 alignments_tried = 0; | |
| 338 last_diagonal = INT64_MIN; // This is not perfect but if the diagonal reaches the value then we have an overflow anyway | |
| 339 qf.x_start = 0; | |
| 340 qf.t_len = 0; | |
| 341 | |
| 342 //if(hta->full_comp == TRUE) memset(&hta->markers[my_current_task->r1], 0, my_current_task->r2 - my_current_task->r1 + 1); // Reset used tags | |
| 343 memset(&hta->markers[0], FALSE, hta->database->n_seqs); // Reset used tags | |
| 344 | |
| 345 curr_read++; | |
| 346 //printf("On current read %"PRIu64"\n", curr_read); | |
| 347 continue; | |
| 348 } | |
| 349 | |
| 350 if(c == 'A' || c == 'C' || c == 'T' || c == 'G'){ | |
| 351 curr_kmer[crrSeqL] = (unsigned char) c; | |
| 352 crrSeqL++; | |
| 353 }else{ | |
| 354 crrSeqL = 0; | |
| 355 } | |
| 356 | |
| 357 if (crrSeqL >= custom_kmer) { // Full well formed sequence | |
| 358 | |
| 359 | |
| 360 //printf("comparing hit: %.11s\n", (char *)&curr_kmer[0]); | |
| 361 //getchar(); | |
| 362 | |
| 363 // Choose table | |
| 364 /* | |
| 365 if(curr_kmer[0] == (unsigned char) 'A'){ | |
| 366 ptr_table_redirect = hta->container_A; | |
| 367 }else if(curr_kmer[0] == (unsigned char) 'C'){ | |
| 368 ptr_table_redirect = hta->container_C; | |
| 369 }else if(curr_kmer[0] == (unsigned char) 'G'){ | |
| 370 ptr_table_redirect = hta->container_G; | |
| 371 }else{ | |
| 372 ptr_table_redirect = hta->container_T; | |
| 373 } | |
| 374 */ | |
| 375 | |
| 376 //fprintf(stdout, "%s\n", curr_kmer); | |
| 377 //fflush(stdout); | |
| 378 current_table = 0; | |
| 379 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
| 380 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
| 381 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
| 382 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
| 383 | |
| 384 if(pointer == NULL){ | |
| 385 ++current_table; | |
| 386 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
| 387 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
| 388 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
| 389 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
| 390 } | |
| 391 if(pointer == NULL){ | |
| 392 ++current_table; | |
| 393 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
| 394 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
| 395 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
| 396 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
| 397 } | |
| 398 if(pointer == NULL){ | |
| 399 ++current_table; | |
| 400 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
| 401 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
| 402 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
| 403 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
| 404 } | |
| 405 | |
| 406 //While there are hits | |
| 407 //fprintf(stdout, "%p\n", aux); | |
| 408 //fflush(stdout); | |
| 409 | |
| 410 uint64_t hash_forward = hashOfWord(&curr_kmer[FIXED_K], custom_kmer - FIXED_K); | |
| 411 AVLTree * search = find_AVLTree(pointer, hash_forward); | |
| 412 if(search != NULL) aux = search->next; else aux = NULL; | |
| 413 | |
| 414 | |
| 415 while(aux != NULL && ((hta->full_comp == FALSE && NWaligned == 0 && hta->markers[aux->s_id+ hta->contained_reads[current_table]] == 0) || (hta->full_comp && hta->markers[aux->s_id+ hta->contained_reads[current_table]] == 0))){ | |
| 416 | |
| 417 n_hits++; | |
| 418 //fprintf(stdout, "%p\n", aux); | |
| 419 //fflush(stdout); | |
| 420 // ADD OFFSET CUCOOOOOOOOOOOOOOOOOOOOOO!!!!!!!!!!!!!!! | |
| 421 //printf("my current table is %u\n", current_table); | |
| 422 //printf("check this woop: %"PRIu64" - %"PRIu64" - %"PRIu64" - %"PRIu64"\n", aux->s_id, hta->contained_reads[current_table], aux->pos, hta->base_coordinates[current_table]); | |
| 423 curr_db_seq = aux->s_id + hta->contained_reads[current_table]; | |
| 424 pos_of_hit = aux->pos + hta->base_coordinates[current_table]; | |
| 425 //printf("Pos of hit db: %"PRIu64", seq num %"PRIu64", contained reads: %"PRIu64", contained coord %"PRIu64"\n", pos_of_hit, curr_db_seq, hta->contained_reads[current_table], hta->base_coordinates[current_table]); | |
| 426 if(hta->hits != NULL){ | |
| 427 hta->hits[curr_db_seq]++; | |
| 428 goto only_hits; // Count only hits and skip the rest | |
| 429 } | |
| 430 | |
| 431 //fprintf(stdout, "Launching curr_read: %"PRIu64" @ %"PRIu64", vs curr_db_read: %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
| 432 /* | |
| 433 if(curr_read == 534) fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
| 434 */ | |
| 435 #ifdef VERBOSE | |
| 436 fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
| 437 #endif | |
| 438 int64_t curr_diagonal = (int64_t)(curr_pos+1) - (int64_t) pos_of_hit; | |
| 439 | |
| 440 | |
| 441 | |
| 442 if( (last_diagonal != curr_diagonal && !(qf.x_start <= (pos_of_hit + custom_kmer) && pos_of_hit <= (qf.x_start + qf.t_len)))){ | |
| 443 | |
| 444 /* | |
| 445 if(curr_db_seq == hta->database->n_seqs-1){ | |
| 446 xlen = hta->database->total_len - hta->database->start_pos[curr_db_seq]; | |
| 447 }else{ | |
| 448 xlen = hta->database->start_pos[curr_db_seq+1] - hta->database->start_pos[curr_db_seq]; | |
| 449 } | |
| 450 if(curr_read == hta->query->n_seqs-1){ | |
| 451 ylen = hta->query->total_len - hta->query->start_pos[curr_read]; | |
| 452 }else{ | |
| 453 ylen = hta->query->start_pos[curr_read+1] - hta->query->start_pos[curr_read]; | |
| 454 } | |
| 455 */ | |
| 456 | |
| 457 /*if(curr_read == 35){ | |
| 458 fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
| 459 printf("what do you think will happen: %"PRIu64"\n", hta->database->start_pos[curr_db_seq]); | |
| 460 //fprintf(stdout, "lengths: x: %"PRIu64", y: %"PRIu64"\n", xlen, ylen); | |
| 461 getchar(); | |
| 462 }*/ | |
| 463 | |
| 464 | |
| 465 //printf("accepted because: \n"); | |
| 466 /*if(curr_read % 100 == 0){ | |
| 467 fprintf(stdout, "Launching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
| 468 } | |
| 469 */ | |
| 470 //printf("prev_diag: %"PRId64"- currdiag: %"PRId64"\n", last_diagonal, curr_diagonal); | |
| 471 //printf("\t covers to [%"PRIu64"+%"PRIu64"=%"PRIu64"] [%"PRIu64"] \n", qf.x_start, qf.t_len, qf.x_start+qf.t_len, pos_of_hit); getchar(); | |
| 472 if(hta->markers[curr_db_seq] == FALSE){ | |
| 473 | |
| 474 //if(current_table > 0) getchar(); | |
| 475 alignmentFromQuickHits(hta->database, hta->query, pos_of_hit, curr_pos+1, curr_read, curr_db_seq, &qf, hta->contained_reads[current_table], hta->base_coordinates[current_table]); | |
| 476 last_diagonal = curr_diagonal; | |
| 477 }else{ | |
| 478 qf.e_value = 100000000; | |
| 479 } | |
| 480 | |
| 481 //if(curr_read == 35) printf(" evalue: %Le from %"PRIu64", %"PRIu64" with l: %"PRIu64"\n", qf.e_value, qf.x_start, qf.y_start, qf.t_len); | |
| 482 }else{ | |
| 483 | |
| 484 /*if(curr_read == 35){ | |
| 485 printf("rejected because: \n"); | |
| 486 fprintf(stdout, "UNLaunching %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
| 487 printf("prev_diag: %"PRId64"- currdiag: %"PRId64"\n", last_diagonal, curr_diagonal); | |
| 488 printf("\t covers to [%"PRIu64"+%"PRIu64"=%"PRIu64"] [%"PRIu64"] \n", qf.x_start, qf.t_len, qf.x_start+qf.t_len, pos_of_hit); getchar(); | |
| 489 }*/ | |
| 490 | |
| 491 qf.e_value = 100000000; | |
| 492 } | |
| 493 | |
| 494 #ifdef VERBOSE | |
| 495 printf(" evalue: %Le %"PRIu64"\n", qf.e_value, qf.t_len); | |
| 496 #endif | |
| 497 //getchar(); | |
| 498 | |
| 499 | |
| 500 | |
| 501 | |
| 502 | |
| 503 //If e-value of current frag is good, then we compute a good gapped alignment | |
| 504 if(qf.e_value < hta->min_e_value /*&& xlen == 799 && ylen == 2497*/){ | |
| 505 alignments_tried++; | |
| 506 ba.identities = ba.length = ba.igaps = ba.egaps = 0; | |
| 507 //Compute lengths of reads | |
| 508 if(curr_db_seq == hta->database->n_seqs-1){ | |
| 509 xlen = hta->database->total_len - hta->database->start_pos[curr_db_seq]; | |
| 510 }else{ | |
| 511 xlen = hta->database->start_pos[curr_db_seq+1] - hta->database->start_pos[curr_db_seq]; | |
| 512 //printf("!!!\n%"PRIu64", %"PRIu64" :: %"PRIu64"; its db->start_pos[curr_db_seq] db->start_pos[curr_db_seq+1] curr_db_seq\n", hta->database->start_pos[curr_db_seq], hta->database->start_pos[curr_db_seq+1], curr_db_seq); | |
| 513 } | |
| 514 if(curr_read == hta->query->n_seqs-1){ | |
| 515 ylen = hta->query->total_len - hta->query->start_pos[curr_read]; | |
| 516 }else{ | |
| 517 ylen = hta->query->start_pos[curr_read+1] - hta->query->start_pos[curr_read]; | |
| 518 } | |
| 519 //fprintf(stdout, "lengths: x: %"PRIu64", y: %"PRIu64"\n", xlen, ylen); | |
| 520 //Perform alignment plus backtracking | |
| 521 //void build_alignment(char * reconstruct_X, char * reconstruct_Y, uint64_t curr_db_seq, uint64_t curr_read, HashTableArgs * hta, char * my_x, char * my_y, struct cell ** table, struct cell * mc, char * writing_buffer_alignment, BasicAlignment * ba, uint64_t xlen, uint64_t ylen) | |
| 522 if(xlen > MAX_READ_SIZE || ylen > MAX_READ_SIZE){ printf("(%"PRIu64",%"PRIu64")\n", xlen, ylen); terror("Read size reached for gapped alignment."); } | |
| 523 //fprintf(stdout, "R0 %"PRIu64", %"PRIu64"\n", curr_db_seq, curr_read); | |
| 524 | |
| 525 | |
| 526 #ifdef VERBOSE | |
| 527 fprintf(stdout, "qfxs %"PRIu64", dbs %"PRIu64", qfys %"PRIu64" qys %"PRIu64"\n", qf.x_start, hta->database->start_pos[curr_db_seq], qf.y_start, hta->query->start_pos[curr_read]); | |
| 528 #endif | |
| 529 | |
| 530 //fprintf(stdout, "dbFragxs %"PRIu64", dbs %"PRIu64", rFragys %"PRIu64" rys %"PRIu64"\n", qf.x_start, hta->database->start_pos[curr_db_seq], qf.y_start, hta->query->start_pos[curr_read]); | |
| 531 /* | |
| 532 printf("at table: %u\n", current_table); | |
| 533 fprintf(stdout, "Launching curr_read: %"PRIu64" @ %"PRIu64", vs curr_db_read: %"PRIu64" @ %"PRIu64": ", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
| 534 fprintf(stdout, "Launching NW %"PRIu64" @ %"PRIu64", vs %"PRIu64" @ %"PRIu64": \n", curr_read, curr_pos+1, curr_db_seq, pos_of_hit); | |
| 535 printf("have len: %"PRIu64", %"PRIu64"\n", xlen, ylen); | |
| 536 printf("Quickfrag (xs, ys): qf::%"PRIu64", %"PRIu64", tlen:%"PRIu64"\n", qf.x_start, qf.y_start, qf.t_len); | |
| 537 printf("Yea, but start and end of read in db is: %"PRIu64" - %"PRIu64"\n", hta->database->start_pos[curr_db_seq], hta->database->start_pos[curr_db_seq+1]); | |
| 538 */ | |
| 539 | |
| 540 p1.x = qf.x_start - hta->database->start_pos[curr_db_seq]; | |
| 541 //p1.y = qf.y_start - hta->query->start_pos[curr_read]; | |
| 542 p1.y = qf.y_start - (hta->query->start_pos[curr_read] -1); | |
| 543 p2.x = p1.x + qf.t_len; | |
| 544 p2.y = p1.y + qf.t_len; | |
| 545 p3.x = xlen; | |
| 546 p3.y = ylen; | |
| 547 | |
| 548 #ifdef VERBOSE | |
| 549 fprintf(stdout, "p0 (%"PRIu64", %"PRIu64") p1 (%"PRIu64", %"PRIu64") p2 (%"PRIu64", %"PRIu64") p3 (%"PRIu64", %"PRIu64")\n", p0.x, p0.y, p1.x, p1.y, p2.x, p2.y, p3.x, p3.y); | |
| 550 #endif | |
| 551 //fprintf(stdout, "p0 (%"PRIu64", %"PRIu64") p1 (%"PRIu64", %"PRIu64") p2 (%"PRIu64", %"PRIu64") p3 (%"PRIu64", %"PRIu64")\n", p0.x, p0.y, p1.x, p1.y, p2.x, p2.y, p3.x, p3.y); | |
| 552 | |
| 553 calculate_y_cell_path(p0, p1, p2, p3, cell_path_y); | |
| 554 | |
| 555 // REMOVE | |
| 556 /* | |
| 557 uint64_t r1,r2; | |
| 558 for(r1=0;r1<MAX_WINDOW_SIZE;r1++){ | |
| 559 for(r2=0;r2<MAX_WINDOW_SIZE;r2++){ | |
| 560 hta->table[r1][r2].score = INT64_MIN; | |
| 561 } | |
| 562 } | |
| 563 */ | |
| 564 | |
| 565 build_alignment(hta->reconstruct_X, hta->reconstruct_Y, curr_db_seq, curr_read, hta, hta->my_x, hta->my_y, hta->table, hta->mc, hta->writing_buffer_alignment, &ba, xlen, ylen, cell_path_y, &hta->window); | |
| 566 | |
| 567 // Set the read to already aligned so that it does not repeat | |
| 568 hta->markers[curr_db_seq] = 1; | |
| 569 | |
| 570 #ifdef VERBOSE | |
| 571 printf("len 1 %"PRIu64", len 2 %"PRIu64"\n", ba.length, ylen); | |
| 572 printf("ident %"PRIu64"\n", ba.identities); | |
| 573 #endif | |
| 574 | |
| 575 //If is good | |
| 576 if(((long double)(ba.length-(ba.igaps+ba.egaps))/ylen) >= hta->min_coverage && ((long double)ba.identities/(ba.length-(ba.igaps+ba.egaps))) >= hta->min_identity){ | |
| 577 if(already_aligned == FALSE){ | |
| 578 hta->accepted_query_reads++; | |
| 579 already_aligned = TRUE; | |
| 580 //printf("accepted: %"PRIu64"\n", hta->accepted_query_reads); | |
| 581 } | |
| 582 | |
| 583 hta->markers[curr_db_seq] = 1; | |
| 584 if(hta->out != NULL){ | |
| 585 //printf("Last was: (%"PRIu64", %"PRIu64")\n", curr_read, curr_db_seq); | |
| 586 fprintf(hta->out, "(%"PRIu64", %"PRIu64") : %d%% %d%% %"PRIu64"\n $$$$$$$ \n", curr_read, curr_db_seq, MIN(100,(int)(100*(ba.length-(ba.igaps+ba.egaps))/ylen)), MIN(100,(int)((long double)100*ba.identities/(ba.length-(ba.igaps+ba.egaps)))), ylen); | |
| 587 fprintf(hta->out, "%s", hta->writing_buffer_alignment); | |
| 588 //fprintf(stdout, "(%"PRIu64", %"PRIu64") : %d%% %d%% %"PRIu64"\n $$$$$$$ \n", curr_read, curr_db_seq, MIN(100,(int)(100*ba.identities/ba.length)), MIN(100,(int)(100*ba.length/ylen)), ylen); | |
| 589 //fprintf(stdout, "%s", hta->writing_buffer_alignment); | |
| 590 } | |
| 591 NWaligned = 1; | |
| 592 }/*else{ | |
| 593 printf("what: "); | |
| 594 printf("len x %"PRIu64", len y %"PRIu64"\n", xlen, ylen); | |
| 595 printf("ident %"PRIu64" len %"PRIu64"\n", ba.identities, ba.length); getchar(); | |
| 596 }*/ | |
| 597 | |
| 598 } | |
| 599 | |
| 600 //strncpy(get_from_db, &hta->database->sequences[qf.x_start], qf.t_len); | |
| 601 //strncpy(get_from_query, &hta->query->sequences[qf.y_start], qf.t_len); | |
| 602 //fprintf(hta->out, "%s\n%s\n%Le\t%d\n-------------------\n", get_from_db, get_from_query, qf.e_value, (int)(100*qf.coverage)); | |
| 603 //fprintf(hta->out, "%"PRIu64", %"PRIu64", %"PRIu64"\n", qf.x_start, qf.y_start, qf.t_len); | |
| 604 | |
| 605 //printf("Hit comes from %"PRIu64", %"PRIu64"\n", pos_of_hit, curr_pos); | |
| 606 only_hits: | |
| 607 aux = aux->next; | |
| 608 while(aux == NULL && current_table < FIXED_LOADING_THREADS-1){ | |
| 609 ++current_table; | |
| 610 pointer = &ptr_table_redirect[current_table]->root[char_converter[curr_kmer[1]]][char_converter[curr_kmer[2]]][char_converter[curr_kmer[3]]] | |
| 611 [char_converter[curr_kmer[4]]][char_converter[curr_kmer[5]]][char_converter[curr_kmer[6]]] | |
| 612 [char_converter[curr_kmer[7]]][char_converter[curr_kmer[8]]][char_converter[curr_kmer[9]]] | |
| 613 [char_converter[curr_kmer[10]]][char_converter[curr_kmer[11]]]; | |
| 614 hash_forward = hashOfWord(&curr_kmer[FIXED_K], custom_kmer - FIXED_K); | |
| 615 search = find_AVLTree(pointer, hash_forward); | |
| 616 if(search != NULL) aux = search->next; else aux = NULL; | |
| 617 } | |
| 618 //fprintf(stdout, "%p\n", aux); | |
| 619 //fflush(stdout); | |
| 620 } | |
| 621 //printf("SWITCHED\n"); | |
| 622 | |
| 623 if(NWaligned == 1 && hta->full_comp == FALSE){ | |
| 624 if(curr_read < hta->query->n_seqs) curr_pos = hta->query->start_pos[curr_read+1]-2; | |
| 625 }else{ | |
| 626 memcpy(b_aux, curr_kmer, custom_kmer); | |
| 627 memcpy(curr_kmer, &b_aux[1], custom_kmer-1); | |
| 628 crrSeqL -= 1; | |
| 629 } | |
| 630 } | |
| 631 //printf("current pos: %"PRIu64"\n", curr_pos); | |
| 632 curr_pos++; | |
| 633 if(curr_pos < hta->query->total_len) c = (char) hta->query->sequences[curr_pos]; | |
| 634 | |
| 635 | |
| 636 | |
| 637 } | |
| 638 | |
| 639 | |
| 640 | |
| 641 | |
| 642 | |
| 643 } | |
| 644 | |
| 645 | |
| 646 | |
| 647 | |
| 648 | |
| 649 //fprintf(stdout, "Going from %"PRIu64" to %"PRIu64"\n", hta->from, hta->to); | |
| 650 //fflush(stdout); | |
| 651 | |
| 652 free(cell_path_y); | |
| 653 | |
| 654 return NULL; | |
| 655 | |
| 656 } | |
| 657 | |
| 658 void build_alignment(char * reconstruct_X, char * reconstruct_Y, uint64_t curr_db_seq, uint64_t curr_read, HashTableArgs * hta, unsigned char * my_x, unsigned char * my_y, struct cell ** table, struct positioned_cell * mc, char * writing_buffer_alignment, BasicAlignment * ba, uint64_t xlen, uint64_t ylen, int64_t * cell_path_y, long double * window){ | |
| 659 | |
| 660 | |
| 661 //Do some printing of alignments here | |
| 662 uint64_t maximum_len, i, j, curr_pos_buffer, curr_window_size; | |
| 663 | |
| 664 maximum_len = 2*MAX(xlen,ylen); | |
| 665 memcpy(my_x, &hta->database->sequences[hta->database->start_pos[curr_db_seq]], xlen); | |
| 666 memcpy(my_y, &hta->query->sequences[hta->query->start_pos[curr_read]], ylen); | |
| 667 | |
| 668 struct best_cell bc = NW(my_x, 0, xlen, my_y, 0, ylen, (int64_t) hta->igap, (int64_t) hta->egap, table, mc, 0, cell_path_y, window, &curr_window_size); | |
| 669 backtrackingNW(my_x, 0, xlen, my_y, 0, ylen, table, reconstruct_X, reconstruct_Y, &bc, &i, &j, ba, cell_path_y, curr_window_size); | |
| 670 uint64_t offset = 0, before_i = 0, before_j = 0; | |
| 671 i++; j++; | |
| 672 | |
| 673 #ifdef VERBOSE | |
| 674 uint64_t z=0; | |
| 675 for(z=0;z<maximum_len;z++) printf("%c", reconstruct_X[z]); | |
| 676 printf("\n"); | |
| 677 for(z=0;z<maximum_len;z++) printf("%c", reconstruct_Y[z]); | |
| 678 #endif | |
| 679 | |
| 680 curr_pos_buffer = 0; | |
| 681 while(i <= maximum_len && j <= maximum_len){ | |
| 682 offset = 0; | |
| 683 before_i = i; | |
| 684 writing_buffer_alignment[curr_pos_buffer++] = 'D'; | |
| 685 writing_buffer_alignment[curr_pos_buffer++] = '\t'; | |
| 686 while(offset < ALIGN_LEN && i <= maximum_len){ | |
| 687 //fprintf(stdout, "%c", reconstruct_X[i]); | |
| 688 writing_buffer_alignment[curr_pos_buffer++] = (char) reconstruct_X[i]; | |
| 689 i++; | |
| 690 offset++; | |
| 691 } | |
| 692 //fprintf(out, "\n"); | |
| 693 | |
| 694 writing_buffer_alignment[curr_pos_buffer++] = '\n'; | |
| 695 offset = 0; | |
| 696 before_j = j; | |
| 697 | |
| 698 //fprintf(stdout, "\n"); | |
| 699 | |
| 700 writing_buffer_alignment[curr_pos_buffer++] = 'Q'; | |
| 701 writing_buffer_alignment[curr_pos_buffer++] = '\t'; | |
| 702 | |
| 703 while(offset < ALIGN_LEN && j <= maximum_len){ | |
| 704 //fprintf(stdout, "%c", reconstruct_Y[j]); | |
| 705 writing_buffer_alignment[curr_pos_buffer++] = (char) reconstruct_Y[j]; | |
| 706 j++; | |
| 707 offset++; | |
| 708 } | |
| 709 //fprintf(out, "\n"); | |
| 710 writing_buffer_alignment[curr_pos_buffer++] = '\n'; | |
| 711 writing_buffer_alignment[curr_pos_buffer++] = ' '; | |
| 712 writing_buffer_alignment[curr_pos_buffer++] = '\t'; | |
| 713 while(before_i < i){ | |
| 714 if(reconstruct_X[before_i] != '-' && reconstruct_Y[before_j] != '-' && reconstruct_X[before_i] == reconstruct_Y[before_j]){ | |
| 715 //fprintf(out, "*"); | |
| 716 writing_buffer_alignment[curr_pos_buffer++] = '*'; | |
| 717 ba->identities++; | |
| 718 }else{ | |
| 719 //fprintf(out, " "); | |
| 720 writing_buffer_alignment[curr_pos_buffer++] = ' '; | |
| 721 } | |
| 722 before_j++; | |
| 723 before_i++; | |
| 724 } | |
| 725 writing_buffer_alignment[curr_pos_buffer++] = '\n'; | |
| 726 | |
| 727 } | |
| 728 //fprintf(out, "\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n"); | |
| 729 writing_buffer_alignment[curr_pos_buffer++] = '\n'; | |
| 730 writing_buffer_alignment[curr_pos_buffer++] = '\0'; | |
| 731 | |
| 732 | |
| 733 } | |
| 734 | |
| 735 void alignmentFromQuickHits(SeqInfo * database, SeqInfo * query, uint64_t pos_database, uint64_t pos_query, uint64_t curr_read, uint64_t curr_db_seq, Quickfrag * qf, uint64_t offset_db_reads, uint64_t offset_db_coordinates){ | |
| 736 | |
| 737 int64_t read_x_start, read_x_end, read_y_start, read_y_end; | |
| 738 | |
| 739 if(curr_db_seq == database->n_seqs-1){ | |
| 740 read_x_start = database->start_pos[curr_db_seq]; | |
| 741 read_x_end = database->total_len; | |
| 742 }else{ | |
| 743 read_x_start = database->start_pos[curr_db_seq]; | |
| 744 read_x_end = database->start_pos[curr_db_seq+1] - 1; | |
| 745 } | |
| 746 | |
| 747 //printf("read x start -> %"PRId64", end -> %"PRId64" btw: %"PRIu64"\n", read_x_start, read_x_end, database->n_seqs); | |
| 748 | |
| 749 if(curr_read == query->n_seqs-1){ | |
| 750 read_y_start = query->start_pos[curr_read]; | |
| 751 read_y_end = query->total_len; | |
| 752 }else{ | |
| 753 read_y_start = query->start_pos[curr_read]; | |
| 754 read_y_end = query->start_pos[curr_read+1] - 1; | |
| 755 } | |
| 756 | |
| 757 //printf("db_end %"PRId64" query_end %"PRId64"\n", read_x_end, read_y_end); | |
| 758 //printf("pos database: %"PRIu64"\n", pos_database); | |
| 759 int64_t curr_pos_db = (int64_t) pos_database; | |
| 760 int64_t curr_pos_qy = (int64_t) pos_query; | |
| 761 int64_t final_end_x = (int64_t) pos_database - 1, final_start_x = final_end_x - custom_kmer + 1, final_start_y = pos_query - custom_kmer; | |
| 762 int64_t score_right = custom_kmer * POINT; | |
| 763 int64_t score_left = score_right; | |
| 764 int64_t high_left = score_left, high_right = score_right; | |
| 765 qf->t_len = custom_kmer; | |
| 766 uint64_t idents = custom_kmer; | |
| 767 | |
| 768 /* | |
| 769 char le_hit[1000]; | |
| 770 memcpy(le_hit, &database->sequences[final_start_x], FIXED_K); | |
| 771 | |
| 772 fprintf(stdout, "HIT: %s\n", le_hit); | |
| 773 fflush(stdout); | |
| 774 */ | |
| 775 //printf("final start x: %"PRId64"\n", final_start_x); | |
| 776 int keep_going = 1; | |
| 777 | |
| 778 //Forward search | |
| 779 while(keep_going == 1){ | |
| 780 | |
| 781 | |
| 782 if(score_right > 0 && curr_pos_db < database->total_len && curr_pos_qy < query->total_len){ | |
| 783 if(curr_pos_db > read_x_end || curr_pos_qy > read_y_end) break; | |
| 784 //if(database->sequences[curr_pos_db] == query->sequences[curr_pos_qy]){ score_right+=POINT; idents++; }else{ score_right-=POINT;} | |
| 785 if(compare_letters(database->sequences[curr_pos_db], query->sequences[curr_pos_qy]) == POINT){ score_right+=POINT; idents++; }else{ score_right-=POINT;} | |
| 786 if(high_right <= score_right){ | |
| 787 final_end_x = curr_pos_db; | |
| 788 high_right = score_right; | |
| 789 } | |
| 790 curr_pos_db++; | |
| 791 curr_pos_qy++; | |
| 792 }else{ | |
| 793 keep_going = 0; | |
| 794 } | |
| 795 } | |
| 796 | |
| 797 //printf("pos here %"PRIu64" curr_pos_db, curr_pos_query %"PRIu64"\n", curr_pos_db, curr_pos_qy); | |
| 798 //printf("final start x: %"PRId64"\n", final_start_x); | |
| 799 keep_going = 1; | |
| 800 curr_pos_db = pos_database - custom_kmer - 1; | |
| 801 curr_pos_qy = pos_query - custom_kmer - 1; | |
| 802 | |
| 803 score_left = high_right; | |
| 804 | |
| 805 //Backward search | |
| 806 while(keep_going == 1){ | |
| 807 | |
| 808 if(score_left > 0 && curr_pos_db >= 0 && curr_pos_qy >= 0){ | |
| 809 if(curr_pos_db < read_x_start || curr_pos_qy < read_y_start ) break; | |
| 810 //if(database->sequences[curr_pos_db] == query->sequences[curr_pos_qy]){ score_left+=POINT; idents++; }else{ score_left-=POINT;} | |
| 811 if(compare_letters(database->sequences[curr_pos_db], query->sequences[curr_pos_qy]) == POINT){ score_left+=POINT; idents++; }else{ score_left-=POINT;} | |
| 812 if(high_left <= score_left){ | |
| 813 final_start_x = curr_pos_db; | |
| 814 final_start_y = curr_pos_qy; | |
| 815 //printf("got %"PRIu64" when min is %"PRIu64"\n", final_start_y, read_y_start); | |
| 816 high_left = score_left; | |
| 817 } | |
| 818 curr_pos_db--; | |
| 819 curr_pos_qy--; | |
| 820 }else{ | |
| 821 keep_going = 0; | |
| 822 } | |
| 823 } | |
| 824 | |
| 825 qf->t_len = final_end_x - final_start_x; | |
| 826 | |
| 827 /* | |
| 828 char s1[1000]; | |
| 829 char s2[1000]; | |
| 830 | |
| 831 memcpy(s1, &database->sequences[final_start_x], qf->t_len); | |
| 832 memcpy(s2, &query->sequences[final_start_y], qf->t_len); | |
| 833 s1[qf->t_len] = '\0'; | |
| 834 s2[qf->t_len] = '\0'; | |
| 835 | |
| 836 fprintf(stdout, "%s\n%s\n------\n", s1, s2); | |
| 837 fflush(stdout); | |
| 838 | |
| 839 printf("the real hit was:\n"); | |
| 840 memcpy(s1, &database->sequences[pos_database-12+1], 12); | |
| 841 memcpy(s2, &query->sequences[pos_query-12+1], 12); | |
| 842 s1[12] = '\0'; | |
| 843 s2[12] = '\0'; | |
| 844 | |
| 845 fprintf(stdout, "%s\n%s\n------\n", s1, s2); | |
| 846 fflush(stdout); | |
| 847 //getchar(); | |
| 848 printf("%"PRIu64"\n", idents); | |
| 849 */ | |
| 850 | |
| 851 long double rawscore = (idents*POINT) - (qf->t_len - idents)*(POINT); | |
| 852 | |
| 853 long double t_len; | |
| 854 if(curr_read == query->n_seqs-1){ | |
| 855 t_len = (long double) query->total_len - query->start_pos[curr_read]; | |
| 856 }else{ | |
| 857 t_len = (long double) query->start_pos[curr_read+1] - query->start_pos[curr_read]; | |
| 858 } | |
| 859 //printf("final start x: %"PRId64"\n", final_start_x); | |
| 860 qf->x_start = final_start_x; | |
| 861 qf->y_start = final_start_y; | |
| 862 qf->e_value = (long double) QF_KARLIN*t_len*database->total_len*expl(-QF_LAMBDA * rawscore); | |
| 863 qf->coverage = qf->t_len / t_len; | |
| 864 | |
| 865 } | |
| 866 | |
| 867 void calculate_y_cell_path(Point p0, Point p1, Point p2, Point p3, int64_t * y_points){ | |
| 868 | |
| 869 //Calculate lines between points | |
| 870 uint64_t i; | |
| 871 | |
| 872 #ifdef VERBOSE | |
| 873 printf("Built on\n"); | |
| 874 printf("(%"PRIu64", %"PRIu64")\n", p0.x, p0.y); | |
| 875 printf("(%"PRIu64", %"PRIu64")\n", p1.x, p1.y); | |
| 876 printf("(%"PRIu64", %"PRIu64")\n", p2.x, p2.y); | |
| 877 printf("(%"PRIu64", %"PRIu64")\n", p3.x, p3.y); | |
| 878 #endif | |
| 879 | |
| 880 | |
| 881 | |
| 882 if(p0.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p0.x); terror("Reached max length in read for anchoring procedure (1)"); } | |
| 883 if(p1.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p1.x); terror("Reached max length in read for anchoring procedure (2)"); } | |
| 884 if(p2.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p2.x); terror("Reached max length in read for anchoring procedure (3)"); } | |
| 885 if(p3.x > MAX_READ_SIZE){ fprintf(stdout, "LEN error %"PRIu64"\n", p3.x); terror("Reached max length in read for anchoring procedure (4)"); } | |
| 886 | |
| 887 long double deltax, deltay, deltaerr, error; | |
| 888 uint64_t y; | |
| 889 | |
| 890 //P0 to P1 | |
| 891 deltax = p1.x - p0.x; | |
| 892 deltay = p1.y - p0.y; | |
| 893 if(deltax != 0) deltaerr = fabsl(deltay/deltax); else deltaerr = 0; | |
| 894 //printf("Deltas x: %Le y: %Le Error: %Le\n", deltax, deltay, deltaerr); | |
| 895 error = deltaerr - 0.5; | |
| 896 y = p0.y; | |
| 897 | |
| 898 for(i=p0.x;i<p1.x;i++){ | |
| 899 y_points[i] = (int64_t) y; | |
| 900 error = error + deltaerr; | |
| 901 if(error >= 0.5){ | |
| 902 y++; | |
| 903 error = error - 1; | |
| 904 } | |
| 905 } | |
| 906 | |
| 907 //P1 to P2 | |
| 908 | |
| 909 deltax = p2.x - p1.x; | |
| 910 deltay = p2.y - p1.y; | |
| 911 if(deltax != 0) deltaerr = fabsl(deltay/deltax); else deltaerr = 0; | |
| 912 //printf("Deltas x: %Le y: %Le Error: %Le\n", deltax, deltay, deltaerr); | |
| 913 error = deltaerr - 0.5; | |
| 914 y = p1.y; | |
| 915 | |
| 916 for(i=p1.x;i<p2.x;i++){ | |
| 917 y_points[i] = (int64_t) y; | |
| 918 error = error + deltaerr; | |
| 919 if(error >= 0.5){ | |
| 920 y++; | |
| 921 error = error - 1; | |
| 922 } | |
| 923 } | |
| 924 | |
| 925 //P2 to P3 | |
| 926 | |
| 927 deltax = p3.x - p2.x; | |
| 928 deltay = p3.y - p2.y; | |
| 929 if(deltax != 0) deltaerr = fabsl(deltay/deltax); else deltaerr = 0; | |
| 930 //printf("Deltas x: %Le y: %Le Error: %Le\n", deltax, deltay, deltaerr); | |
| 931 error = deltaerr - 0.5; | |
| 932 y = p2.y; | |
| 933 | |
| 934 for(i=p2.x;i<p3.x;i++){ | |
| 935 y_points[i] = (int64_t) y; | |
| 936 error = error + deltaerr; | |
| 937 if(error >= 0.5){ | |
| 938 y++; | |
| 939 error = error - 1; | |
| 940 } | |
| 941 } | |
| 942 | |
| 943 /* | |
| 944 if(p3.x == 799 && p3.y == 2497){ | |
| 945 for(i=0;i<p3.x;i++){ | |
| 946 printf("%"PRIu64": %"PRIu64"\n", i, y_points[i]); | |
| 947 if(i % 50 == 0) getchar(); | |
| 948 } | |
| 949 } | |
| 950 */ | |
| 951 | |
| 952 #ifdef VERBOSE | |
| 953 for(i=0;i<p3.x;i++){ | |
| 954 printf("%"PRIu64" -> ", y_points[i]); | |
| 955 if(i % 50 == 0) getchar(); | |
| 956 } | |
| 957 | |
| 958 #endif | |
| 959 | |
| 960 | |
| 961 | |
| 962 } | |
| 963 | |
| 964 struct best_cell NW(unsigned char * X, uint64_t Xstart, uint64_t Xend, unsigned char * Y, uint64_t Ystart, uint64_t Yend, int64_t iGap, int64_t eGap, struct cell ** table, struct positioned_cell * mc, int show, int64_t * cell_path_y, long double * window, uint64_t * current_window_size){ | |
| 965 | |
| 966 | |
| 967 uint64_t i, j, j_prime; | |
| 968 int64_t scoreDiagonal = INT64_MIN, scoreLeft = INT64_MIN, scoreRight = INT64_MIN, score = INT64_MIN, delta_dif_1_0, delta_dif_2_1, limit_left, limit_right, j_right_prime = 1, j_left_prime = 1, j_diag_prime = 1; | |
| 969 | |
| 970 struct best_cell bc; | |
| 971 bc.c.score = INT64_MIN; | |
| 972 bc.c.xpos = 0; bc.c.ypos = 0; | |
| 973 | |
| 974 //The window size will be a +-15% of the square root of the product of lengths | |
| 975 int64_t window_size = MIN(MAX_WINDOW_SIZE/2, (uint64_t) (*window * sqrtl((long double) Xend * (long double) Yend))); | |
| 976 //printf("xlen: %"PRIu64", ylen: %"PRIu64" w-size: %"PRId64"\n", Xend, Yend, window_size); | |
| 977 *current_window_size = (uint64_t) window_size; | |
| 978 | |
| 979 //The limits to the window | |
| 980 limit_left = 0; | |
| 981 limit_right = 2*window_size + 1; | |
| 982 if(limit_right > MAX_WINDOW_SIZE) limit_right = MAX_WINDOW_SIZE; | |
| 983 | |
| 984 struct positioned_cell mf; | |
| 985 mf.score = INT64_MIN; | |
| 986 | |
| 987 | |
| 988 //First row. iCounter serves as counter from zero | |
| 989 //printf("..0%%"); | |
| 990 //Zero will always be | |
| 991 table[0][0].score = compare_letters(X[0], Y[0]); | |
| 992 mc[0].score = table[0][0].score; | |
| 993 mc[0].xpos = 0; | |
| 994 mc[0].ypos = 0; | |
| 995 | |
| 996 //if(Xend == 799 && Yend == 2497) printf("I am %p The count is real %.5s %.5s %p %p \n", &table[0][0], X, Y, X, Y); | |
| 997 | |
| 998 | |
| 999 for(i=1;i<Yend;i++){ | |
| 1000 //table[0][i].score = (X[0] == Y[i]) ? POINT : -POINT; | |
| 1001 if(i < MAX_WINDOW_SIZE) table[0][i].score = compare_letters(X[0], Y[i]) + iGap + (i-1)*eGap; | |
| 1002 //table[Xstart][i].xfrom = Xstart; | |
| 1003 //table[Xstart][i].yfrom = i; | |
| 1004 //Set every column max | |
| 1005 mc[i].score = compare_letters(X[0], Y[i]) + iGap + (i-1)*eGap; | |
| 1006 #ifdef VERBOSE | |
| 1007 printf("%02"PRId64" ", mc[i].score); | |
| 1008 #endif | |
| 1009 mc[i].xpos = 0; | |
| 1010 mc[i].ypos = i; | |
| 1011 | |
| 1012 } | |
| 1013 #ifdef VERBOSE | |
| 1014 printf("\n"); | |
| 1015 #endif | |
| 1016 //Set row max | |
| 1017 mf.score = table[0][0].score; | |
| 1018 mf.xpos = 0; | |
| 1019 mf.ypos = 0; | |
| 1020 //Init j | |
| 1021 j = MAX(1,(cell_path_y[1] - window_size)); | |
| 1022 | |
| 1023 //Go through full matrix | |
| 1024 for(i=1;i<Xend;i++){ | |
| 1025 //Fill first rowcell | |
| 1026 if(cell_path_y[i-1]+window_size < cell_path_y[i]) return bc; //terror("Sequence proportions make window shift too large"); | |
| 1027 //Conversion for the j-coordinate | |
| 1028 j_prime = 1; | |
| 1029 | |
| 1030 //table[i][0].score = (X[i] == Y[0]) ? POINT : -POINT; | |
| 1031 if(cell_path_y[i] - window_size <= 0){ | |
| 1032 table[i][0].score = compare_letters(X[i], Y[0]) + iGap + (i-1)*eGap; | |
| 1033 mf.score = table[i][0].score; | |
| 1034 }else{ | |
| 1035 mf.score = compare_letters(X[i], Y[0]) + iGap + (i-1)*eGap; | |
| 1036 } | |
| 1037 | |
| 1038 mf.xpos = i-1; | |
| 1039 mf.ypos = 0; | |
| 1040 | |
| 1041 delta_dif_1_0 = MAX(1, (cell_path_y[i] - window_size)) - MAX(1,(cell_path_y[i-1] - window_size)); //j-1 | |
| 1042 if(i>1) delta_dif_2_1 = MAX(1, (cell_path_y[i-1] - window_size)) - MAX(1, (cell_path_y[i-2] - window_size)); //j-2 | |
| 1043 | |
| 1044 #ifdef VERBOSE | |
| 1045 printf("D1_0: %"PRId64" D2_1: %"PRId64"\n", delta_dif_1_0, delta_dif_2_1); | |
| 1046 #endif | |
| 1047 | |
| 1048 #ifdef VERBOSE | |
| 1049 printf("%02"PRId64" ", mf.score); | |
| 1050 #endif | |
| 1051 //printf("Check on i: (%"PRIu64") from - to (%"PRIu64", %"PRIu64")\n", i, 0L, Xend); | |
| 1052 /* | |
| 1053 if(1||i==262){ | |
| 1054 printf("I will go from %"PRIu64" to %"PRIu64" and I am %"PRIu64", %"PRIu64"\n", (uint64_t) MAX(1,(cell_path_y[i] - (int64_t)window_size)), (uint64_t) MIN((int64_t)Yend,(cell_path_y[i] + (int64_t)window_size)), i, j); | |
| 1055 //printf("lengs: %"PRIu64", %"PRIu64"\n", Xend, Yend); | |
| 1056 //printf("cp[i]: %"PRId64", cp[i-1] %"PRId64"\n", cell_path_y[i], cell_path_y[i-1]); | |
| 1057 //printf("min(%"PRId64", %"PRId64" + %"PRId64")-------------------\n", Yend, cell_path_y[i] ,(int64_t)window_size); | |
| 1058 | |
| 1059 } | |
| 1060 */ | |
| 1061 //getchar(); | |
| 1062 | |
| 1063 //printf("@%"PRIu64"[%"PRId64"] -> (%"PRIu64", %"PRIu64") jp %"PRIu64", lright %"PRIu64"\n", i, cell_path_y[i], MAX(1,(cell_path_y[i] - window_size)), MIN(Yend,(cell_path_y[i] + window_size)), j_prime, limit_right); | |
| 1064 //printf("M:@%"PRIu64"-> %"PRIu64"\n", i, MIN(Yend,(cell_path_y[i] + window_size))); | |
| 1065 #ifdef VERBOSE | |
| 1066 int64_t r; | |
| 1067 for(r=0;r<MAX(0,(cell_path_y[i] - window_size)); r++){ | |
| 1068 printf(" "); | |
| 1069 } | |
| 1070 #endif | |
| 1071 | |
| 1072 /* | |
| 1073 if(Xend == 799 && Yend == 2497 && i >= 145 && i <= 155){ | |
| 1074 printf("them limits @i %"PRIu64"::: %"PRIu64", %"PRIu64"\n", i, MAX(1,(cell_path_y[i] - window_size)), MIN(Yend,(cell_path_y[i] + window_size))); | |
| 1075 getchar(); | |
| 1076 } | |
| 1077 */ | |
| 1078 | |
| 1079 | |
| 1080 for(j=MAX(1,(cell_path_y[i] - window_size));j<MIN(Yend,(cell_path_y[i] + window_size)) && j_prime < limit_right;j++){ | |
| 1081 //if(i == 8302){ printf("Doing on : (%"PRIu64",%"PRIu64" and jprime=%"PRIu64"\n", i,j,j_prime); getchar(); } | |
| 1082 //Check if max in row has changed | |
| 1083 //if(j > MAX(1, cell_path_y[i-1] - window_size +1) && mf.score <= table[i][j-2].score){ | |
| 1084 //if(j_prime == MAX_WINDOW_SIZE) break; | |
| 1085 //Calculate the real j position in the windowed table | |
| 1086 | |
| 1087 j_left_prime = ((int64_t)j_prime - (2 - delta_dif_1_0)); | |
| 1088 //j_diag_prime = ((int64_t)j_prime - (1 - delta_dif_1_0)); | |
| 1089 j_diag_prime = ((int64_t)j_prime - (1 - delta_dif_1_0)); | |
| 1090 if(i > 1){ | |
| 1091 j_right_prime = ((int64_t)j_prime - (1 - (delta_dif_1_0 + delta_dif_2_1))); | |
| 1092 } | |
| 1093 | |
| 1094 if(j > MAX(1, cell_path_y[i-1] - window_size +1) && j < MIN(Yend,(cell_path_y[i-1] + window_size)) && j_left_prime < limit_right && table[i-1][j_left_prime].score >= mf.score){ | |
| 1095 //mf.score = table[i-1][j-2].score; | |
| 1096 mf.score = table[i-1][j_left_prime].score; | |
| 1097 mf.xpos = i-1; | |
| 1098 mf.ypos = j-2; | |
| 1099 if(table[i-1][j_left_prime].score == INT64_MIN){ printf("A: mf.x\t%"PRIu64"\tmf.y\t%"PRIu64"\ts%"PRId64"\n", mf.xpos, mf.ypos, mf.score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
| 1100 | |
| 1101 } | |
| 1102 //printf("RowMax: %"PRId64"@(%"PRIu64", %"PRIu64")\t", mf.score, mf.xpos, mf.ypos); | |
| 1103 | |
| 1104 //score = (X[i] == Y[j]) ? POINT : -POINT; | |
| 1105 score = compare_letters(X[i], Y[j]); | |
| 1106 | |
| 1107 //Precondition: Upper row needs to reach up to diagonal | |
| 1108 //if((cell_path_y[i-1]+window_size) >= j-1){ | |
| 1109 if(i > 1 && j >= 1 && j-1 >= MAX(1,(cell_path_y[i-2] - window_size)) && j-1 < MIN(Yend,(cell_path_y[i-2] + window_size)) && j_right_prime >= limit_left && j_right_prime < limit_right && table[i-2][j_right_prime].score >= mc[j-1].score ){ | |
| 1110 //mc[j-1].score = table[i-2][j-(1+j_prime)].score; | |
| 1111 //Should be the j_prime we had at cell_path_y | |
| 1112 //MAX(1,(cell_path_y[i] - window_size));j<MIN(Yend,(cell_path_y[i] + window_size)) | |
| 1113 | |
| 1114 mc[j-1].score = table[i-2][j_right_prime].score; | |
| 1115 mc[j-1].xpos = i-2; | |
| 1116 mc[j-1].ypos = j-1; | |
| 1117 | |
| 1118 if(table[i-2][j_right_prime].score == INT64_MIN){ printf("A: j-1\t%"PRIu64"\tmc.xpos\t%"PRIu64"\ts%"PRId64"\n", j-1, mc[j-1].xpos, mc[j-1].score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
| 1119 | |
| 1120 } | |
| 1121 | |
| 1122 | |
| 1123 if(j-1 >= MAX(0, (cell_path_y[i-1]-window_size)) && (cell_path_y[i-1]+window_size) >= j-1 && j_diag_prime >= limit_left && j_diag_prime < limit_right && j_diag_prime < cell_path_y[i-1]+window_size){ | |
| 1124 //scoreDiagonal = table[i-1][j-1].score + score; | |
| 1125 //printf("prevdiag: %"PRId64"\n", table[i-1][j_diag_prime].score); | |
| 1126 scoreDiagonal = table[i-1][j_diag_prime].score + score; | |
| 1127 if(table[i-1][j_diag_prime].score == INT64_MIN){ printf("A: i-1\t%"PRIu64"\tj_diag\t%"PRIu64"\ts%"PRId64"\n", i-1, j_diag_prime, table[i-1][j_diag_prime].score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
| 1128 }else{ | |
| 1129 scoreDiagonal = INT64_MIN; | |
| 1130 } | |
| 1131 | |
| 1132 if(i>=1 && j>1){ | |
| 1133 scoreLeft = mf.score + iGap + (j - (mf.ypos+2))*eGap + score; | |
| 1134 | |
| 1135 if(mf.score == INT64_MIN){ printf("A: mf.x\t%"PRIu64"\tmf.y\t%"PRIu64"\ts%"PRId64"\n", mf.xpos, mf.ypos, mf.score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
| 1136 }else{ | |
| 1137 scoreLeft = INT64_MIN; | |
| 1138 } | |
| 1139 | |
| 1140 if(j>=1 && i>1){ | |
| 1141 scoreRight = mc[j-1].score + iGap + (i - (mc[j-1].xpos+2))*eGap + score; | |
| 1142 //if(scoreRight == -12) printf("MC: %"PRId64", From: %"PRIu64", %"PRIu64"->", mc[j-1].score, mc[j-1].xpos, mc[j-1].ypos); | |
| 1143 | |
| 1144 if(mc[j-1].score == INT64_MIN){ printf("A: j-1\t%"PRIu64"\tmc.xpos\t%"PRIu64"\ts%"PRId64"\n", j-1, mc[j-1].xpos, mc[j-1].score); printf("@[%"PRIu64", %"PRIu64"] with j_prime: %"PRIu64", wsize: %"PRIu64", cp[i-1]=%"PRId64", cp[i]=%"PRId64"\n", i, j, j_prime, 2*window_size, cell_path_y[i-1], cell_path_y[i]); getchar(); } | |
| 1145 }else{ | |
| 1146 scoreRight = INT64_MIN; | |
| 1147 } | |
| 1148 | |
| 1149 /* | |
| 1150 if(Xend == 799 && Yend == 2497 && i >= 152 && i == 153){ | |
| 1151 printf("@%"PRIu64", %"PRIu64" -> scores: %"PRId64", %"PRId64", %"PRId64"\n", i, j, scoreDiagonal, scoreRight, scoreLeft); | |
| 1152 printf("in position @ jprime= %"PRIu64" cellpaths [i-2, i-1, i] are %"PRId64", %"PRId64", %"PRId64", window_size: %"PRId64", j_diag_prime: %"PRId64"\n", j_prime, cell_path_y[i-2], cell_path_y[i-1], cell_path_y[i], window_size, j_diag_prime); | |
| 1153 printf("Mfs from scoreLeft: mf.x\t%"PRIu64"\tmf.y\t%"PRIu64"\ts%"PRId64"\n", mf.xpos, mf.ypos, mf.score); | |
| 1154 getchar(); | |
| 1155 } | |
| 1156 */ | |
| 1157 | |
| 1158 //Choose maximum | |
| 1159 /* | |
| 1160 #ifdef VERBOSE | |
| 1161 printf("The game starts at %"PRId64"\n", MAX(0, cell_path_y[i] - window_size)); | |
| 1162 printf("from %c %c and I get to %"PRIu64" while j=%"PRIu64"\n", X[i], Y[j], j_prime, j); | |
| 1163 printf("j_prime: %"PRId64"\n", j_prime); | |
| 1164 printf("j_diag_prime: %"PRId64" limits[%"PRId64", %"PRId64"]\n", j_diag_prime, limit_left, limit_right); | |
| 1165 printf("Score DIAG: %"PRId64"; LEFT: %"PRId64"; RIGHT: %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight); | |
| 1166 printf("currmf: %"PRId64" mc: %"PRId64"\n", mf.score, mc[j-1].score); | |
| 1167 #endif | |
| 1168 */ | |
| 1169 | |
| 1170 | |
| 1171 //if(i >= MAX_READ_SIZE){ printf("i=%"PRIu64"\n", i); terror("i overflowed\n");} | |
| 1172 //if(j_prime >= MAX_WINDOW_SIZE){ printf("upper : %"PRId64"\n", MIN(Yend,(cell_path_y[i] + window_size-1))); printf("jp=%"PRIu64"\n", j_prime); terror("j overflowed\n"); } | |
| 1173 | |
| 1174 | |
| 1175 | |
| 1176 if(scoreDiagonal >= scoreLeft && scoreDiagonal >= scoreRight){ | |
| 1177 //Diagonal | |
| 1178 | |
| 1179 //fprintf(stdout, "The JPRIME: %"PRId64" actual pos: %"PRIu64"\n", j_prime, j); getchar(); | |
| 1180 table[i][j_prime].score = scoreDiagonal; | |
| 1181 table[i][j_prime].xfrom = i-1; | |
| 1182 table[i][j_prime].yfrom = j-1; | |
| 1183 | |
| 1184 | |
| 1185 }else if(scoreRight > scoreLeft){ | |
| 1186 table[i][j_prime].score = scoreRight; | |
| 1187 table[i][j_prime].xfrom = mc[j-1].xpos; | |
| 1188 table[i][j_prime].yfrom = mc[j-1].ypos; | |
| 1189 | |
| 1190 }else{ | |
| 1191 //printf("Scores %"PRId64", %"PRId64", %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight); | |
| 1192 table[i][j_prime].score = scoreLeft; | |
| 1193 table[i][j_prime].xfrom = mf.xpos; | |
| 1194 table[i][j_prime].yfrom = mf.ypos; | |
| 1195 } | |
| 1196 //printf("F: i\t%"PRIu64"\tj_prime\t%"PRIu64"\n", i, j_prime); | |
| 1197 //getchar(); | |
| 1198 //if(i == 94){ printf("showing j %"PRIu64" jprime %"PRIu64" lleft %"PRIu64", llright %"PRIu64"\n", j, j_prime, limit_left, limit_right); getchar(); } | |
| 1199 //if(i == 94 && j == 374){ printf("stopped at 94, 374 s %"PRId64"\n", table[i][j_prime].score); getchar(); } | |
| 1200 | |
| 1201 | |
| 1202 | |
| 1203 | |
| 1204 /* | |
| 1205 if(i == 264 && j == 176){ | |
| 1206 printf("@%"PRIu64", %"PRIu64"\n", i, j); | |
| 1207 printf("my score is %"PRId64"\n", mc[j-1].score); | |
| 1208 printf("in position @ jprime= %"PRIu64" cellpaths [i-1, i] are %"PRId64", %"PRId64"\n", j_prime, cell_path_y[i-1], cell_path_y[i]); | |
| 1209 printf("Scores %"PRId64", %"PRId64", %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight); | |
| 1210 printf("check j_right_prime == %"PRIu64"\n", j_right_prime); | |
| 1211 getchar(); | |
| 1212 //exit(-1); | |
| 1213 } | |
| 1214 */ | |
| 1215 | |
| 1216 //check if column max has changed | |
| 1217 //New condition: check if you filled i-2, j-1 | |
| 1218 | |
| 1219 | |
| 1220 if(i == Xend-1 || j == Yend-1){ | |
| 1221 | |
| 1222 if(i == Xend-1 && j != Yend-1){ | |
| 1223 table[i][j_prime].score = table[i][j_prime].score + iGap + (Yend - j)*eGap; | |
| 1224 }else if(j == Yend-1 && i != Xend-1){ | |
| 1225 table[i][j_prime].score = table[i][j_prime].score + iGap + (Xend - i)*eGap; | |
| 1226 } | |
| 1227 //Check for best cell | |
| 1228 if(table[i][j_prime].score >= bc.c.score){ | |
| 1229 | |
| 1230 /* | |
| 1231 if(i == 798 && j == 1052){ // yields 799, 2497 | |
| 1232 printf("in position @ jprime= %"PRIu64" cellpaths [i-1, i] are %"PRId64", %"PRId64"\n", j_prime, cell_path_y[i-1], cell_path_y[i]); | |
| 1233 printf("Scores %"PRId64", %"PRId64", %"PRId64"\n", scoreDiagonal, scoreLeft, scoreRight); | |
| 1234 printf("score comes from %"PRIu64", %"PRIu64",\n", mc[j-1].xpos, mc[j-1].ypos); | |
| 1235 printf("IDlengths: %"PRIu64", %"PRIu64"\n", Xend, Yend); | |
| 1236 | |
| 1237 //exit(-1); | |
| 1238 } | |
| 1239 */ | |
| 1240 | |
| 1241 bc.c.score = table[i][j_prime].score; bc.c.xpos = i; bc.c.ypos = j; bc.j_prime = j_prime; | |
| 1242 } | |
| 1243 //bc.c.score = table[i][j_prime].score; bc.c.xpos = i; bc.c.ypos = j; bc.j_prime = j_prime; | |
| 1244 } | |
| 1245 | |
| 1246 | |
| 1247 #ifdef VERBOSE | |
| 1248 //printf("Put score: %"PRId64"\n\n", table[i][j_prime].score); | |
| 1249 //printf("(%"PRId64")%02"PRId64" ", j_diag_prime, table[i][j_prime].score); //printf("->(%"PRIu64", %"PRIu64")", i, j); printf("[%c %c]", X[i], Y[j]); | |
| 1250 //if(scoreDiagonal >= scoreLeft && scoreDiagonal >= scoreRight) printf("*\t"); | |
| 1251 //else if(scoreRight > scoreLeft) printf("{\t"); else printf("}\t"); | |
| 1252 //getchar(); | |
| 1253 | |
| 1254 #endif | |
| 1255 j_prime++; | |
| 1256 } | |
| 1257 #ifdef VERBOSE | |
| 1258 printf("\n"); | |
| 1259 getchar(); | |
| 1260 #endif | |
| 1261 } | |
| 1262 | |
| 1263 return bc; | |
| 1264 } | |
| 1265 | |
| 1266 | |
| 1267 | |
| 1268 void backtrackingNW(unsigned char * X, uint64_t Xstart, uint64_t Xend, unsigned char * Y, uint64_t Ystart, uint64_t Yend, struct cell ** table, char * rec_X, char * rec_Y, struct best_cell * bc, uint64_t * ret_head_x, uint64_t * ret_head_y, BasicAlignment * ba, int64_t * cell_path_y, uint64_t window_size){ | |
| 1269 uint64_t curr_x, curr_y, prev_x, prev_y, head_x, head_y, limit_x, limit_y; | |
| 1270 int64_t k, j_prime, delta_diff = 0; | |
| 1271 | |
| 1272 //limit_x = 2*MAX_READ_SIZE-1; | |
| 1273 //limit_y = limit_x; | |
| 1274 limit_x = 2*MAX(Xend, Yend); | |
| 1275 limit_y = limit_x; | |
| 1276 //head_x = 2*MAX(Xend, Yend); | |
| 1277 //head_y = 2*MAX(Xend, Yend); | |
| 1278 head_x = limit_x; | |
| 1279 head_y = limit_y; | |
| 1280 curr_x = bc->c.xpos; | |
| 1281 curr_y = bc->c.ypos; | |
| 1282 #ifdef VERBOSE | |
| 1283 printf("Optimum : %"PRIu64", %"PRIu64"\n", curr_x, curr_y); | |
| 1284 #endif | |
| 1285 //printf("Optimum : %"PRIu64", %"PRIu64"\n", curr_x, curr_y); | |
| 1286 | |
| 1287 prev_x = curr_x; | |
| 1288 prev_y = curr_y; | |
| 1289 int show = 0; | |
| 1290 | |
| 1291 for(k=Xend-1; k>curr_x; k--) rec_X[head_x--] = '-'; | |
| 1292 for(k=Yend-1; k>curr_y; k--) rec_Y[head_y--] = '-'; | |
| 1293 | |
| 1294 j_prime = bc->j_prime; | |
| 1295 //printf("init prime: %"PRIu64"\n", j_prime); | |
| 1296 unsigned char first_track = 1; | |
| 1297 | |
| 1298 while(curr_x > 0 && curr_y > 0){ | |
| 1299 | |
| 1300 | |
| 1301 if(first_track == 0){ | |
| 1302 delta_diff = MAX(1, cell_path_y[prev_x] - (int64_t) window_size) - MAX(1, cell_path_y[curr_x] - (int64_t)window_size); //j-1 | |
| 1303 j_prime = MAX(0, j_prime - (int64_t)(prev_y - curr_y) + (int64_t) delta_diff); | |
| 1304 | |
| 1305 | |
| 1306 if(/*(bc->c.xpos == 630 && bc->c.ypos == 541 )||*/ j_prime > MAX_WINDOW_SIZE){ | |
| 1307 | |
| 1308 printf("from %"PRIu64", %"PRIu64"\nto %"PRIu64", %"PRIu64"\n", prev_x, prev_y, curr_x, curr_y); | |
| 1309 printf("jp: %"PRIu64", py,cy %"PRIu64", %"PRIu64", delta: %"PRId64"\n", j_prime, prev_y, curr_y, (int64_t)delta_diff); | |
| 1310 printf("currx curry : %"PRIu64", %"PRIu64"\n", curr_x, curr_y); | |
| 1311 printf("window size: %"PRIu64"\n", window_size); | |
| 1312 printf("cp[prev, curr] : %"PRId64", %"PRId64"\n", cell_path_y[prev_x], cell_path_y[curr_x]); | |
| 1313 printf("my cell path: %"PRId64"\n", cell_path_y[curr_x]); | |
| 1314 printf("Optimum : %"PRIu64", %"PRIu64"\n", bc->c.xpos, bc->c.ypos); | |
| 1315 getchar(); | |
| 1316 } | |
| 1317 | |
| 1318 //j_prime = j_prime - (int64_t)(prev_y - curr_y) + (int64_t) delta_diff; | |
| 1319 | |
| 1320 prev_x = curr_x; | |
| 1321 prev_y = curr_y; | |
| 1322 | |
| 1323 /* | |
| 1324 if(bc->c.xpos == 798 && bc->c.ypos == 1052){ | |
| 1325 printf("[%c %c]", X[prev_x], Y[prev_y]); | |
| 1326 printf("(%"PRIu64", %"PRIu64") ::: \n", curr_x, curr_y); | |
| 1327 | |
| 1328 } | |
| 1329 */ | |
| 1330 | |
| 1331 #ifdef VERBOSE | |
| 1332 //printf("Jprime: %"PRId64" :DELTADIF:%"PRId64"\n", j_prime, delta_diff); | |
| 1333 printf("[%c %c]", X[prev_x], Y[prev_y]); | |
| 1334 printf("(%"PRIu64", %"PRIu64") ::: \n", curr_x, curr_y); | |
| 1335 //printf("(%"PRIu64", %"PRIu64") ::: \n", prev_x, prev_y); | |
| 1336 //printf("cellp Prev: %"PRId64" Post: %"PRId64"\n", cell_path_y[prev_x], cell_path_y[curr_x]); | |
| 1337 //printf("the difs? %"PRId64" the other: %"PRId64"\n", MAX(0, cell_path_y[prev_x] - (int64_t) window_size), MAX(0, cell_path_y[curr_x] - (int64_t)window_size)); | |
| 1338 getchar(); | |
| 1339 #endif | |
| 1340 | |
| 1341 } | |
| 1342 | |
| 1343 //if(table[prev_x][j_prime].xfrom > MAX_READ_SIZE || table[prev_x][j_prime].yfrom > MAX_WINDOW_SIZE) fprintf(stdout, "OH NOES !! %"PRIu64"\t%"PRId64"\t%"PRIu64"\t%"PRIu64" dangers: %"PRIu64", %"PRIu64"\n", prev_x, j_prime, Xend, Yend, table[prev_x][j_prime].xfrom, table[prev_x][j_prime].yfrom); | |
| 1344 | |
| 1345 /* | |
| 1346 if(table[prev_x][j_prime].xfrom > MAX_READ_SIZE || table[prev_x][j_prime].yfrom > MAX_WINDOW_SIZE){ | |
| 1347 fprintf(stdout, "OH NOES !! %"PRIu64"\t%"PRId64"\t%"PRIu64"\t%"PRIu64" dangers: %"PRIu64", %"PRIu64"\n", prev_x, j_prime, Xend, Yend, table[prev_x][j_prime].xfrom, table[prev_x][j_prime].yfrom); | |
| 1348 uint64_t k; | |
| 1349 for(k=0;k<Xend;k++){ | |
| 1350 fprintf(stdout, "%c", X[k]); | |
| 1351 } | |
| 1352 fprintf(stdout, "\n"); | |
| 1353 for(k=0;k<Yend;k++){ | |
| 1354 fprintf(stdout, "%c", Y[k]); | |
| 1355 } | |
| 1356 fprintf(stdout, "\n"); | |
| 1357 show = 1; | |
| 1358 } | |
| 1359 */ | |
| 1360 if(j_prime >= MAX_WINDOW_SIZE) printf("j_prime:overflow %"PRIu64"\n", j_prime); | |
| 1361 | |
| 1362 | |
| 1363 curr_x = table[prev_x][j_prime].xfrom; | |
| 1364 curr_y = table[prev_x][j_prime].yfrom; | |
| 1365 first_track = 0; | |
| 1366 | |
| 1367 //printf("w: %"PRIu64"- %"PRIu64"\n", curr_x, curr_y); | |
| 1368 | |
| 1369 | |
| 1370 if((curr_x == (prev_x - 1)) && (curr_y == (prev_y -1))){ | |
| 1371 //Diagonal case | |
| 1372 //printf("DIAG\n"); | |
| 1373 if(head_x == 0 || head_y == 0) goto exit_point; | |
| 1374 rec_X[head_x--] = (char) X[prev_x]; | |
| 1375 rec_Y[head_y--] = (char) Y[prev_y]; | |
| 1376 ba->length++; | |
| 1377 | |
| 1378 }else if((prev_x - curr_x) > (prev_y - curr_y)){ | |
| 1379 //Gap in X | |
| 1380 //printf("Gap X\n"); | |
| 1381 if(head_x == 0 || head_y == 0) goto exit_point; | |
| 1382 if(bc->c.xpos != prev_x && bc->c.ypos != prev_y){ | |
| 1383 rec_Y[head_y--] = Y[prev_y]; | |
| 1384 rec_X[head_x--] = X[prev_x]; | |
| 1385 }else{ | |
| 1386 rec_Y[head_y--] = '-'; | |
| 1387 rec_X[head_x--] = X[prev_x]; | |
| 1388 } | |
| 1389 ba->length++; | |
| 1390 | |
| 1391 for(k=prev_x-1;k>curr_x;k--){ | |
| 1392 if(head_x == 0 || head_y == 0) goto exit_point; | |
| 1393 #ifdef VERBOSE | |
| 1394 if(head_x == 0 || head_y == 0){ | |
| 1395 printf("%"PRIu64" %"PRIu64" and prevs are %"PRIu64" %"PRIu64"\n", head_x, head_y, prev_x, prev_y); | |
| 1396 printf("origin is %"PRIu64", %"PRIu64"\n", bc->c.xpos, bc->c.ypos); | |
| 1397 uint64_t z; | |
| 1398 for(z=head_x;z<limit_x;z++){ | |
| 1399 fprintf(stdout, "%c", (char) rec_X[z]); | |
| 1400 } | |
| 1401 printf("\n"); | |
| 1402 for(z=head_y;z<limit_y;z++){ | |
| 1403 fprintf(stdout, "%c", (char) rec_Y[z]); | |
| 1404 } | |
| 1405 getchar(); | |
| 1406 } | |
| 1407 #endif | |
| 1408 rec_Y[head_y--] = '-'; | |
| 1409 rec_X[head_x--] = (char) X[k]; | |
| 1410 ba->length++; | |
| 1411 ba->egaps++; | |
| 1412 } | |
| 1413 ba->igaps += 1; | |
| 1414 ba->egaps--; | |
| 1415 }else{ | |
| 1416 //Gap in Y | |
| 1417 //printf("GAP Y\n"); | |
| 1418 //10, 0, 401, 281 | |
| 1419 if(head_x == 0 || head_y == 0) goto exit_point; | |
| 1420 if(bc->c.xpos != prev_x && bc->c.ypos != prev_y){ | |
| 1421 rec_Y[head_y--] = Y[prev_y]; | |
| 1422 rec_X[head_x--] = X[prev_x]; | |
| 1423 }else{ | |
| 1424 rec_Y[head_y--] = Y[prev_y]; | |
| 1425 rec_X[head_x--] = '-'; | |
| 1426 } | |
| 1427 ba->length++; | |
| 1428 | |
| 1429 for(k=prev_y-1;k>curr_y;k--){ | |
| 1430 if(head_x == 0 || head_y == 0) goto exit_point; | |
| 1431 #ifdef VERBOSE | |
| 1432 if(head_x == 0 || head_y == 0){ | |
| 1433 printf("%"PRIu64" %"PRIu64" and prevs are %"PRIu64" %"PRIu64"\n", head_x, head_y, prev_x, prev_y); | |
| 1434 printf("origin is %"PRIu64", %"PRIu64"\n", bc->c.xpos, bc->c.ypos); | |
| 1435 uint64_t z; | |
| 1436 for(z=head_x;z<limit_x;z++){ | |
| 1437 fprintf(stdout, "%c", (char) rec_X[z]); | |
| 1438 } | |
| 1439 printf("\n"); | |
| 1440 for(z=head_y;z<limit_y;z++){ | |
| 1441 fprintf(stdout, "%c", (char) rec_Y[z]); | |
| 1442 } | |
| 1443 getchar(); | |
| 1444 } | |
| 1445 #endif | |
| 1446 rec_X[head_x--] = '-'; | |
| 1447 rec_Y[head_y--] = (char) Y[k]; | |
| 1448 ba->length++; | |
| 1449 ba->egaps++; | |
| 1450 } | |
| 1451 | |
| 1452 ba->igaps += 1; | |
| 1453 ba->egaps--; | |
| 1454 } | |
| 1455 | |
| 1456 } | |
| 1457 | |
| 1458 if(curr_x == 0 && curr_y == 0 && (curr_x == (prev_x - 1)) && (curr_y == (prev_y -1))){ | |
| 1459 rec_X[head_x--] = (char) X[curr_x]; | |
| 1460 rec_Y[head_y--] = (char) Y[curr_y]; | |
| 1461 ba->length++; | |
| 1462 } | |
| 1463 | |
| 1464 exit_point: | |
| 1465 | |
| 1466 //printf("curr: %"PRIu64", %"PRIu64"\n", curr_x, curr_y); | |
| 1467 //printf("Heads: %"PRIu64", %"PRIu64"\n", head_x, head_y); | |
| 1468 if(show == 1)fprintf(stdout, "%"PRIu64", %"PRIu64"\n", head_x, head_y); | |
| 1469 uint64_t huecos_x = 0, huecos_y = 0; | |
| 1470 k=(int64_t)curr_x-1; | |
| 1471 while(k>=0){ if(head_x == 0) break; rec_X[head_x--] = '-'; huecos_x++; k--; } | |
| 1472 k=(int64_t)curr_y-1; | |
| 1473 while(k>=0){ if(head_y == 0) break; rec_Y[head_y--] = '-'; huecos_y++; k--; } | |
| 1474 | |
| 1475 if(show == 1)fprintf(stdout, "%"PRIu64", %"PRIu64"\n", head_x, head_y); | |
| 1476 | |
| 1477 if(huecos_x >= huecos_y){ | |
| 1478 while(huecos_x > 0) { if(head_y == 0) break; rec_Y[head_y--] = ' '; huecos_x--;} | |
| 1479 }else{ | |
| 1480 while(huecos_y > 0) { if(head_x == 0) break; rec_X[head_x--] = ' '; huecos_y--;} | |
| 1481 } | |
| 1482 | |
| 1483 if(show == 1){ | |
| 1484 fprintf(stdout, "%"PRIu64", %"PRIu64"\n", head_x, head_y); | |
| 1485 fprintf(stdout, "%"PRIu64", %"PRIu64"\n", 2*Xend, 2*Yend); | |
| 1486 uint64_t k; | |
| 1487 for(k=head_x;k<limit_x;k++){ | |
| 1488 fprintf(stdout, "%c", (char) rec_X[k]); | |
| 1489 } | |
| 1490 printf("\n"); | |
| 1491 for(k=head_y;k<limit_y;k++){ | |
| 1492 fprintf(stdout, "%c", (char) rec_Y[k]); | |
| 1493 } | |
| 1494 printf("\n"); | |
| 1495 getchar(); | |
| 1496 } | |
| 1497 | |
| 1498 *ret_head_x = head_x; | |
| 1499 *ret_head_y = head_y; | |
| 1500 #ifdef VERBOSE | |
| 1501 printf("hx hy: %"PRIu64", %"PRIu64"\n", head_x, head_y); | |
| 1502 #endif | |
| 1503 } | |
| 1504 | |
| 1505 | |
| 1506 AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key){ | |
| 1507 | |
| 1508 if(mp[*n_pools_used].current == POOL_SIZE){ | |
| 1509 *n_pools_used += 1; | |
| 1510 if(*n_pools_used == MAX_MEM_POOLS) terror("Reached max pools"); | |
| 1511 init_mem_pool_AVL(&mp[*n_pools_used]); | |
| 1512 | |
| 1513 } | |
| 1514 | |
| 1515 AVLTree * new_pos = mp[*n_pools_used].base + mp[*n_pools_used].current; | |
| 1516 mp[*n_pools_used].current++; | |
| 1517 | |
| 1518 new_pos->key = key; | |
| 1519 new_pos->count = 1; | |
| 1520 new_pos->height = 1; | |
| 1521 | |
| 1522 return new_pos; | |
| 1523 } | |
| 1524 | |
| 1525 void init_mem_pool_AVL(Mempool_AVL * mp){ | |
| 1526 mp->base = (AVLTree *) calloc(POOL_SIZE, sizeof(AVLTree)); | |
| 1527 if(mp->base == NULL) terror("Could not request memory pool"); | |
| 1528 mp->current = 0; | |
| 1529 } | |
| 1530 | |
| 1531 | |
| 1532 | |
| 1533 /* | |
| 1534 // An AVL tree node | |
| 1535 typedef struct AVL_Node{ | |
| 1536 uint64_t key; | |
| 1537 struct AVL_Node * left; | |
| 1538 struct AVL_Node * right; | |
| 1539 uint64_t height; | |
| 1540 llpos * next; | |
| 1541 } AVLTree; | |
| 1542 */ | |
| 1543 | |
| 1544 // A utility function to get height of the tree | |
| 1545 | |
| 1546 uint64_t height(AVLTree * N){ | |
| 1547 if (N == NULL) | |
| 1548 return 0; | |
| 1549 return N->height; | |
| 1550 } | |
| 1551 | |
| 1552 /* Substituted by (x == NULL) ? (0) : (x->height) */ | |
| 1553 | |
| 1554 /* Helper function that allocates a new node with the given key and | |
| 1555 NULL left and right pointers. */ | |
| 1556 | |
| 1557 /* This one is substituted by AVLTree * getNewLocationAVLTree(Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t key) */ | |
| 1558 | |
| 1559 // A utility function to right rotate subtree rooted with y | |
| 1560 // See the diagram given above. | |
| 1561 AVLTree * right_rotate(AVLTree * y){ | |
| 1562 AVLTree * x = y->left; | |
| 1563 AVLTree * T2 = x->right; | |
| 1564 | |
| 1565 // Perform rotation | |
| 1566 x->right = y; | |
| 1567 y->left = T2; | |
| 1568 | |
| 1569 // Update heights | |
| 1570 //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1; | |
| 1571 //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1; | |
| 1572 // Update heights | |
| 1573 y->height = MAX(height(y->left), height(y->right))+1; | |
| 1574 x->height = MAX(height(x->left), height(x->right))+1; | |
| 1575 | |
| 1576 // Return new root | |
| 1577 return x; | |
| 1578 } | |
| 1579 | |
| 1580 // A utility function to left rotate subtree rooted with x | |
| 1581 // See the diagram given above. | |
| 1582 AVLTree * left_rotate(AVLTree * x){ | |
| 1583 AVLTree * y = x->right; | |
| 1584 AVLTree * T2 = y->left; | |
| 1585 | |
| 1586 // Perform rotation | |
| 1587 y->left = x; | |
| 1588 x->right = T2; | |
| 1589 | |
| 1590 // Update heights | |
| 1591 //x->height = MAX((x == NULL) ? (0) : (x->left->height), (x == NULL) ? (0) : (x->right->height))+1; | |
| 1592 //y->height = MAX((y == NULL) ? (0) : (y->left->height), (y == NULL) ? (0) : (y->right->height))+1; | |
| 1593 x->height = MAX(height(x->left), height(x->right))+1; | |
| 1594 y->height = MAX(height(y->left), height(y->right))+1; | |
| 1595 | |
| 1596 // Return new root | |
| 1597 return y; | |
| 1598 } | |
| 1599 | |
| 1600 // Get Balance factor of node N | |
| 1601 | |
| 1602 int64_t get_balance(AVLTree * N){ | |
| 1603 if (N == NULL) | |
| 1604 return 0; | |
| 1605 return height(N->left) - height(N->right); | |
| 1606 } | |
| 1607 | |
| 1608 /* Substituted by (node == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height) */ | |
| 1609 | |
| 1610 AVLTree * find_AVLTree(AVLTree * node, uint64_t key){ | |
| 1611 AVLTree * found = NULL; | |
| 1612 if(node == NULL) return NULL; | |
| 1613 | |
| 1614 if (key < node->key) { | |
| 1615 found = find_AVLTree(node->left, key); | |
| 1616 } else if (key > node->key) { | |
| 1617 found = find_AVLTree(node->right, key); | |
| 1618 } else { | |
| 1619 return node; | |
| 1620 } | |
| 1621 return found; | |
| 1622 } | |
| 1623 | |
| 1624 llpos * find_AVLTree_llpos(AVLTree * node, uint64_t key){ | |
| 1625 llpos * aux = NULL; | |
| 1626 if(node == NULL) return NULL; | |
| 1627 | |
| 1628 if (key < node->key) { | |
| 1629 aux = find_AVLTree_llpos(node->left, key); | |
| 1630 } else if (key > node->key) { | |
| 1631 aux = find_AVLTree_llpos(node->right, key); | |
| 1632 } else { | |
| 1633 return node->next; | |
| 1634 } | |
| 1635 return aux; | |
| 1636 } | |
| 1637 | |
| 1638 // Recursive function to insert key in subtree rooted | |
| 1639 // with node and returns new root of subtree. | |
| 1640 AVLTree * insert_AVLTree(AVLTree * node, uint64_t key, Mempool_AVL * mp, uint64_t * n_pools_used, uint64_t pos, Mempool_l * mp_l, uint64_t * n_pools_used_l, uint64_t s_id){ | |
| 1641 /* 1. Perform the normal BST insertion */ | |
| 1642 if (node == NULL){ | |
| 1643 | |
| 1644 AVLTree * n_node = getNewLocationAVLTree(mp, n_pools_used, key); | |
| 1645 llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l); | |
| 1646 aux->pos = pos; | |
| 1647 aux->s_id = s_id; | |
| 1648 n_node->next = aux; | |
| 1649 return n_node; | |
| 1650 } | |
| 1651 | |
| 1652 if (key < node->key) { | |
| 1653 node->left = insert_AVLTree(node->left, key, mp, n_pools_used, pos, mp_l, n_pools_used_l, s_id); | |
| 1654 } else if (key > node->key) { | |
| 1655 node->right = insert_AVLTree(node->right, key, mp, n_pools_used, pos, mp_l, n_pools_used_l, s_id); | |
| 1656 } else { | |
| 1657 // Equal keys are inserted as a linked list | |
| 1658 llpos * aux = getNewLocationllpos(mp_l, n_pools_used_l); | |
| 1659 aux->pos = pos; | |
| 1660 aux->s_id = s_id; | |
| 1661 aux->next = node->next; | |
| 1662 node->next = aux; | |
| 1663 ++(node->count); | |
| 1664 return node; | |
| 1665 } | |
| 1666 | |
| 1667 /* 2. Update height of this ancestor node */ | |
| 1668 //node->height = 1 + MAX((node->left == NULL) ? (0) : (node->left->height), (node->right == NULL) ? (0) : (node->right->height)); | |
| 1669 node->height = 1 + MAX(height(node->left), height(node->right)); | |
| 1670 | |
| 1671 /* 3. Get the balance factor of this ancestor | |
| 1672 node to check whether this node became | |
| 1673 unbalanced */ | |
| 1674 //int64_t balance = (node->left == NULL || node->right == NULL) ? (0) : ((int64_t) node->left->height - (int64_t) node->right->height); | |
| 1675 int64_t balance = get_balance(node); | |
| 1676 | |
| 1677 // If this node becomes unbalanced, then | |
| 1678 // there are 4 cases | |
| 1679 | |
| 1680 // Left Left Case | |
| 1681 if (balance > 1 && key < node->left->key) | |
| 1682 return right_rotate(node); | |
| 1683 | |
| 1684 // Right Right Case | |
| 1685 if (balance < -1 && key > node->right->key) | |
| 1686 return left_rotate(node); | |
| 1687 | |
| 1688 // Left Right Case | |
| 1689 if (balance > 1 && key > node->left->key) | |
| 1690 { | |
| 1691 node->left = left_rotate(node->left); | |
| 1692 return right_rotate(node); | |
| 1693 } | |
| 1694 | |
| 1695 // Right Left Case | |
| 1696 if (balance < -1 && key < node->right->key) | |
| 1697 { | |
| 1698 node->right = right_rotate(node->right); | |
| 1699 return left_rotate(node); | |
| 1700 } | |
| 1701 | |
| 1702 /* return the (unchanged) node pointer */ | |
| 1703 return node; | |
| 1704 } | |
| 1705 | |
| 1706 // A utility function to print preorder traversal | |
| 1707 // of the tree. | |
| 1708 // The function also prints height of every node | |
| 1709 | |
| 1710 void pre_order(AVLTree * root){ | |
| 1711 if(root != NULL){ | |
| 1712 printf("%"PRIu64" ", root->key); | |
| 1713 llpos * aux = root->next; | |
| 1714 while(aux != NULL){ printf("#%"PRIu64", ", aux->pos); aux = aux->next; } | |
| 1715 pre_order(root->left); | |
| 1716 pre_order(root->right); | |
| 1717 } | |
| 1718 } |
