Mercurial > repos > ashvark > qiime_1_8_0
comparison bwa-0.6.2/fastmap.c @ 2:a294fbfcb1db draft default tip
Uploaded BWA
author | ashvark |
---|---|
date | Fri, 18 Jul 2014 07:55:59 -0400 |
parents | dd1186b11b3b |
children |
comparison
equal
deleted
inserted
replaced
1:a9636dc1e99a | 2:a294fbfcb1db |
---|---|
1 #include <zlib.h> | |
2 #include <stdio.h> | |
3 #include <unistd.h> | |
4 #include <stdlib.h> | |
5 #include "bntseq.h" | |
6 #include "bwt.h" | |
7 #include "kvec.h" | |
8 #include "kseq.h" | |
9 KSEQ_INIT(gzFile, gzread) | |
10 | |
11 extern unsigned char nst_nt4_table[256]; | |
12 | |
13 typedef struct { | |
14 const bwt_t *bwt; | |
15 const uint8_t *query; | |
16 int start, len; | |
17 bwtintv_v *tmpvec[2], *matches; | |
18 } smem_i; | |
19 | |
20 smem_i *smem_iter_init(const bwt_t *bwt) | |
21 { | |
22 smem_i *iter; | |
23 iter = calloc(1, sizeof(smem_i)); | |
24 iter->bwt = bwt; | |
25 iter->tmpvec[0] = calloc(1, sizeof(bwtintv_v)); | |
26 iter->tmpvec[1] = calloc(1, sizeof(bwtintv_v)); | |
27 iter->matches = calloc(1, sizeof(bwtintv_v)); | |
28 return iter; | |
29 } | |
30 | |
31 void smem_iter_destroy(smem_i *iter) | |
32 { | |
33 free(iter->tmpvec[0]->a); | |
34 free(iter->tmpvec[1]->a); | |
35 free(iter->matches->a); | |
36 free(iter); | |
37 } | |
38 | |
39 void smem_set_query(smem_i *iter, int len, const uint8_t *query) | |
40 { | |
41 iter->query = query; | |
42 iter->start = 0; | |
43 iter->len = len; | |
44 } | |
45 | |
46 int smem_next(smem_i *iter) | |
47 { | |
48 iter->tmpvec[0]->n = iter->tmpvec[1]->n = iter->matches->n = 0; | |
49 if (iter->start >= iter->len || iter->start < 0) return -1; | |
50 while (iter->start < iter->len && iter->query[iter->start] > 3) ++iter->start; // skip ambiguous bases | |
51 if (iter->start == iter->len) return -1; | |
52 iter->start = bwt_smem1(iter->bwt, iter->len, iter->query, iter->start, iter->matches, iter->tmpvec); | |
53 return iter->start; | |
54 } | |
55 | |
56 int main_fastmap(int argc, char *argv[]) | |
57 { | |
58 int c, i, min_iwidth = 20, min_len = 17, print_seq = 0; | |
59 kseq_t *seq; | |
60 bwtint_t k; | |
61 gzFile fp; | |
62 bwt_t *bwt; | |
63 bntseq_t *bns; | |
64 smem_i *iter; | |
65 | |
66 while ((c = getopt(argc, argv, "w:l:s")) >= 0) { | |
67 switch (c) { | |
68 case 's': print_seq = 1; break; | |
69 case 'w': min_iwidth = atoi(optarg); break; | |
70 case 'l': min_len = atoi(optarg); break; | |
71 } | |
72 } | |
73 if (optind + 1 >= argc) { | |
74 fprintf(stderr, "Usage: bwa fastmap [-s] [-l minLen=%d] [-w maxSaSize=%d] <idxbase> <in.fq>\n", min_len, min_iwidth); | |
75 return 1; | |
76 } | |
77 | |
78 fp = gzopen(argv[optind + 1], "r"); | |
79 seq = kseq_init(fp); | |
80 { // load the packed sequences, BWT and SA | |
81 char *tmp = calloc(strlen(argv[optind]) + 5, 1); | |
82 strcat(strcpy(tmp, argv[optind]), ".bwt"); | |
83 bwt = bwt_restore_bwt(tmp); | |
84 strcat(strcpy(tmp, argv[optind]), ".sa"); | |
85 bwt_restore_sa(tmp, bwt); | |
86 free(tmp); | |
87 bns = bns_restore(argv[optind]); | |
88 } | |
89 iter = smem_iter_init(bwt); | |
90 while (kseq_read(seq) >= 0) { | |
91 printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l); | |
92 if (print_seq) { | |
93 putchar('\t'); | |
94 puts(seq->seq.s); | |
95 } else putchar('\n'); | |
96 for (i = 0; i < seq->seq.l; ++i) | |
97 seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]]; | |
98 smem_set_query(iter, seq->seq.l, (uint8_t*)seq->seq.s); | |
99 while (smem_next(iter) > 0) { | |
100 for (i = 0; i < iter->matches->n; ++i) { | |
101 bwtintv_t *p = &iter->matches->a[i]; | |
102 if ((uint32_t)p->info - (p->info>>32) < min_len) continue; | |
103 printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]); | |
104 if (p->x[2] <= min_iwidth) { | |
105 for (k = 0; k < p->x[2]; ++k) { | |
106 bwtint_t pos; | |
107 int len, is_rev, ref_id; | |
108 len = (uint32_t)p->info - (p->info>>32); | |
109 pos = bns_depos(bns, bwt_sa(bwt, p->x[0] + k), &is_rev); | |
110 if (is_rev) pos -= len - 1; | |
111 bns_cnt_ambi(bns, pos, len, &ref_id); | |
112 printf("\t%s:%c%ld", bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1); | |
113 } | |
114 } else fputs("\t*", stdout); | |
115 putchar('\n'); | |
116 } | |
117 } | |
118 puts("//"); | |
119 } | |
120 | |
121 smem_iter_destroy(iter); | |
122 bns_destroy(bns); | |
123 bwt_destroy(bwt); | |
124 kseq_destroy(seq); | |
125 gzclose(fp); | |
126 return 0; | |
127 } |