Mercurial > repos > ashvark > qiime_1_8_0
comparison bwa-0.6.2/simple_dp.c @ 0:dd1186b11b3b draft
Uploaded BWA
author | ashvark |
---|---|
date | Fri, 18 Jul 2014 07:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dd1186b11b3b |
---|---|
1 #include <stdlib.h> | |
2 #include <stdio.h> | |
3 #include <unistd.h> | |
4 #include <string.h> | |
5 #include <zlib.h> | |
6 #include <stdint.h> | |
7 #include "stdaln.h" | |
8 #include "utils.h" | |
9 | |
10 #include "kseq.h" | |
11 KSEQ_INIT(gzFile, gzread) | |
12 | |
13 typedef struct { | |
14 int l; | |
15 unsigned char *s; | |
16 char *n; | |
17 } seq1_t; | |
18 | |
19 typedef struct { | |
20 int n_seqs, m_seqs; | |
21 seq1_t *seqs; | |
22 } seqs_t; | |
23 | |
24 unsigned char aln_rev_table[256] = { | |
25 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
26 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
27 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
28 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
29 'N','T','V','G', 'H','N','N','C', 'D','N','N','M', 'N','K','N','N', | |
30 'N','N','Y','S', 'A','N','B','W', 'X','R','N','N', 'N','N','N','N', | |
31 'N','t','v','g', 'h','N','N','c', 'd','N','N','m', 'N','k','N','N', | |
32 'N','N','y','s', 'a','N','b','w', 'x','r','N','N', 'N','N','N','N', | |
33 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
34 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
35 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
36 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
37 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
38 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
39 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', | |
40 'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N' | |
41 }; | |
42 | |
43 static int g_is_global = 0, g_thres = 1, g_strand = 0, g_aa = 0; | |
44 static AlnParam g_aln_param; | |
45 | |
46 static void revseq(int len, uint8_t *seq) | |
47 { | |
48 int i; | |
49 for (i = 0; i < len>>1; ++i) { | |
50 uint8_t tmp = aln_rev_table[seq[len-1-i]]; | |
51 seq[len-1-i] = aln_rev_table[seq[i]]; | |
52 seq[i] = tmp; | |
53 } | |
54 if (len&1) seq[i] = aln_rev_table[seq[i]]; | |
55 } | |
56 | |
57 static seqs_t *load_seqs(const char *fn) | |
58 { | |
59 seqs_t *s; | |
60 seq1_t *p; | |
61 gzFile fp; | |
62 int l; | |
63 kseq_t *seq; | |
64 | |
65 fp = xzopen(fn, "r"); | |
66 seq = kseq_init(fp); | |
67 s = (seqs_t*)calloc(1, sizeof(seqs_t)); | |
68 s->m_seqs = 256; | |
69 s->seqs = (seq1_t*)calloc(s->m_seqs, sizeof(seq1_t)); | |
70 while ((l = kseq_read(seq)) >= 0) { | |
71 if (s->n_seqs == s->m_seqs) { | |
72 s->m_seqs <<= 1; | |
73 s->seqs = (seq1_t*)realloc(s->seqs, s->m_seqs * sizeof(seq1_t)); | |
74 } | |
75 p = s->seqs + (s->n_seqs++); | |
76 p->l = seq->seq.l; | |
77 p->s = (unsigned char*)malloc(p->l + 1); | |
78 memcpy(p->s, seq->seq.s, p->l); | |
79 p->s[p->l] = 0; | |
80 p->n = strdup((const char*)seq->name.s); | |
81 } | |
82 kseq_destroy(seq); | |
83 gzclose(fp); | |
84 fprintf(stderr, "[load_seqs] %d sequences are loaded.\n", s->n_seqs); | |
85 return s; | |
86 } | |
87 | |
88 static void aln_1seq(const seqs_t *ss, const char *name, int l, const char *s, char strand) | |
89 { | |
90 int i; | |
91 for (i = 0; i < ss->n_seqs; ++i) { | |
92 AlnAln *aa; | |
93 seq1_t *p = ss->seqs + i; | |
94 g_aln_param.band_width = l + p->l; | |
95 aa = aln_stdaln_aux(s, (const char*)p->s, &g_aln_param, g_is_global, g_thres, l, p->l); | |
96 if (aa->score >= g_thres || g_is_global) { | |
97 printf(">%s\t%d\t%d\t%s\t%c\t%d\t%d\t%d\t%d\t", p->n, aa->start1? aa->start1 : 1, aa->end1, name, strand, | |
98 aa->start2? aa->start2 : 1, aa->end2, aa->score, aa->subo); | |
99 // NB: I put the short sequence as the first sequence in SW, an insertion to | |
100 // the reference becomes a deletion from the short sequence. Therefore, I use | |
101 // "MDI" here rather than "MID", and print ->out2 first rather than ->out1. | |
102 for (i = 0; i != aa->n_cigar; ++i) | |
103 printf("%d%c", aa->cigar32[i]>>4, "MDI"[aa->cigar32[i]&0xf]); | |
104 printf("\n%s\n%s\n%s\n", aa->out2, aa->outm, aa->out1); | |
105 } | |
106 aln_free_AlnAln(aa); | |
107 } | |
108 } | |
109 | |
110 static void aln_seqs(const seqs_t *ss, const char *fn) | |
111 { | |
112 gzFile fp; | |
113 kseq_t *seq; | |
114 int l; | |
115 | |
116 fp = xzopen(fn, "r"); | |
117 seq = kseq_init(fp); | |
118 while ((l = kseq_read(seq)) >= 0) { | |
119 if (g_strand&1) aln_1seq(ss, (char*)seq->name.s, l, seq->seq.s, '+'); | |
120 if (g_strand&2) { | |
121 revseq(l, (uint8_t*)seq->seq.s); | |
122 aln_1seq(ss, (char*)seq->name.s, l, seq->seq.s, '-'); | |
123 } | |
124 } | |
125 kseq_destroy(seq); | |
126 gzclose(fp); | |
127 } | |
128 | |
129 int bwa_stdsw(int argc, char *argv[]) | |
130 { | |
131 int c; | |
132 seqs_t *ss; | |
133 | |
134 while ((c = getopt(argc, argv, "gT:frp")) >= 0) { | |
135 switch (c) { | |
136 case 'g': g_is_global = 1; break; | |
137 case 'T': g_thres = atoi(optarg); break; | |
138 case 'f': g_strand |= 1; break; | |
139 case 'r': g_strand |= 2; break; | |
140 case 'p': g_aa = 1; break; | |
141 } | |
142 } | |
143 if (g_strand == 0) g_strand = 3; | |
144 if (g_aa) g_strand = 1; | |
145 if (optind + 1 >= argc) { | |
146 fprintf(stderr, "\nUsage: bwa stdsw [options] <seq1.long.fa> <seq2.short.fa>\n\n"); | |
147 fprintf(stderr, "Options: -T INT minimum score [%d]\n", g_thres); | |
148 fprintf(stderr, " -p protein alignment (suppressing -r)\n"); | |
149 fprintf(stderr, " -f forward strand only\n"); | |
150 fprintf(stderr, " -r reverse strand only\n"); | |
151 fprintf(stderr, " -g global alignment\n\n"); | |
152 fprintf(stderr, "Note: This program is specifically designed for alignment between multiple short\n"); | |
153 fprintf(stderr, " sequences and ONE long sequence. It outputs the suboptimal score on the long\n"); | |
154 fprintf(stderr, " sequence.\n\n"); | |
155 return 1; | |
156 } | |
157 g_aln_param = g_aa? aln_param_aa2aa : aln_param_blast; | |
158 g_aln_param.gap_end = 0; | |
159 ss = load_seqs(argv[optind]); | |
160 aln_seqs(ss, argv[optind+1]); | |
161 return 0; | |
162 } |