annotate ezBAMQC/src/htslib/cram/sam_header.c @ 20:9de3bbec2479 draft default tip

Uploaded
author youngkim
date Thu, 31 Mar 2016 10:10:37 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2 Copyright (c) 2013 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Author: James Bonfield <jkb@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Redistribution and use in source and binary forms, with or without
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6 modification, are permitted provided that the following conditions are met:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 1. Redistributions of source code must retain the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 this list of conditions and the following disclaimer.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 2. Redistributions in binary form must reproduce the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 this list of conditions and the following disclaimer in the documentation
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13 and/or other materials provided with the distribution.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16 Institute nor the names of its contributors may be used to endorse or promote
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 products derived from this software without specific prior written permission.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 #ifdef HAVE_CONFIG_H
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 #include "io_lib_config.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 #include <string.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 #include <assert.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 #include "cram/sam_header.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 #include "cram/string_alloc.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 static void sam_hdr_error(char *msg, char *line, int len, int lno) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 int j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 for (j = 0; j < len && line[j] != '\n'; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 ;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 fprintf(stderr, "%s at line %d: \"%.*s\"\n", msg, lno, j, line);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 void sam_hdr_dump(SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 printf("===DUMP===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 for (k = kh_begin(hdr->h); k != kh_end(hdr->h); k++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 SAM_hdr_type *t1, *t2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 char c[2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 if (!kh_exist(hdr->h, k))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 t1 = t2 = kh_val(hdr->h, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 c[0] = kh_key(hdr->h, k)>>8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 c[1] = kh_key(hdr->h, k)&0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 printf("Type %.2s, count %d\n", c, t1->prev->order+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 printf(">>>%d ", t1->order);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 for (tag = t1->tag; tag; tag=tag->next) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 printf("\"%.2s\":\"%.*s\"\t",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 tag->str, tag->len-3, tag->str+3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 putchar('\n');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 t1 = t1->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 } while (t1 != t2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78 /* Dump out PG chains */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79 printf("\n@PG chains:\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 for (i = 0; i < hdr->npg_end; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 int j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 printf(" %d:", i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 for (j = hdr->pg_end[i]; j != -1; j = hdr->pg[j].prev_id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84 printf("%s%d(%.*s)",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 j == hdr->pg_end[i] ? " " : "->",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 j, hdr->pg[j].name_len, hdr->pg[j].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88 printf("\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 puts("===END DUMP===");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 /* Updates the hash tables in the SAM_hdr structure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 * Returns 0 on success;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 static int sam_hdr_update_hashes(SAM_hdr *sh,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100 int type,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 SAM_hdr_type *h_type) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 /* Add to reference hash? */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 if ((type>>8) == 'S' && (type&0xff) == 'Q') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 int nref = sh->nref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 sh->ref = realloc(sh->ref, (sh->nref+1)*sizeof(*sh->ref));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 if (!sh->ref)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 tag = h_type->tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 sh->ref[nref].name = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 sh->ref[nref].len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114 sh->ref[nref].ty = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 sh->ref[nref].tag = tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 while (tag) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118 if (tag->str[0] == 'S' && tag->str[1] == 'N') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119 if (!(sh->ref[nref].name = malloc(tag->len)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 strncpy(sh->ref[nref].name, tag->str+3, tag->len-3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122 sh->ref[nref].name[tag->len-3] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 } else if (tag->str[0] == 'L' && tag->str[1] == 'N') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124 sh->ref[nref].len = atoi(tag->str+3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126 tag = tag->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129 if (sh->ref[nref].name) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131 int r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 k = kh_put(m_s2i, sh->ref_hash, sh->ref[nref].name, &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 if (-1 == r) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134 kh_val(sh->ref_hash, k) = nref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137 sh->nref++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 /* Add to read-group hash? */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141 if ((type>>8) == 'R' && (type&0xff) == 'G') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 int nrg = sh->nrg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145 sh->rg = realloc(sh->rg, (sh->nrg+1)*sizeof(*sh->rg));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146 if (!sh->rg)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149 tag = h_type->tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 sh->rg[nrg].name = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151 sh->rg[nrg].name_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152 sh->rg[nrg].ty = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153 sh->rg[nrg].tag = tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154 sh->rg[nrg].id = nrg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 while (tag) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 if (tag->str[0] == 'I' && tag->str[1] == 'D') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158 if (!(sh->rg[nrg].name = malloc(tag->len)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160 strncpy(sh->rg[nrg].name, tag->str+3, tag->len-3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 sh->rg[nrg].name[tag->len-3] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162 sh->rg[nrg].name_len = strlen(sh->rg[nrg].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164 tag = tag->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
166
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
167 if (sh->rg[nrg].name) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
168 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
169 int r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
170 k = kh_put(m_s2i, sh->rg_hash, sh->rg[nrg].name, &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
171 if (-1 == r) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
172 kh_val(sh->rg_hash, k) = nrg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
173 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
174
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
175 sh->nrg++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
176 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
177
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
178 /* Add to program hash? */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
179 if ((type>>8) == 'P' && (type&0xff) == 'G') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
180 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
181 int npg = sh->npg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
182
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
183 sh->pg = realloc(sh->pg, (sh->npg+1)*sizeof(*sh->pg));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
184 if (!sh->pg)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
185 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
186
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
187 tag = h_type->tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
188 sh->pg[npg].name = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
189 sh->pg[npg].name_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
190 sh->pg[npg].ty = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
191 sh->pg[npg].tag = tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
192 sh->pg[npg].id = npg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
193 sh->pg[npg].prev_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
194
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
195 while (tag) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
196 if (tag->str[0] == 'I' && tag->str[1] == 'D') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
197 if (!(sh->pg[npg].name = malloc(tag->len)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
198 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
199 strncpy(sh->pg[npg].name, tag->str+3, tag->len-3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
200 sh->pg[npg].name[tag->len-3] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
201 sh->pg[npg].name_len = strlen(sh->pg[npg].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
202 } else if (tag->str[0] == 'P' && tag->str[1] == 'P') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
203 // Resolve later if needed
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
204 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
205 char tmp = tag->str[tag->len]; tag->str[tag->len] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
206 k = kh_get(m_s2i, sh->pg_hash, tag->str+3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
207 tag->str[tag->len] = tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
208
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
209 if (k != kh_end(sh->pg_hash)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
210 int p_id = kh_val(sh->pg_hash, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
211 sh->pg[npg].prev_id = sh->pg[p_id].id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
212
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
213 /* Unmark previous entry as a PG termination */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
214 if (sh->npg_end > 0 &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
215 sh->pg_end[sh->npg_end-1] == p_id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
216 sh->npg_end--;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
217 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
218 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
219 for (i = 0; i < sh->npg_end; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
220 if (sh->pg_end[i] == p_id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
221 memmove(&sh->pg_end[i], &sh->pg_end[i+1],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
222 (sh->npg_end-i-1)*sizeof(*sh->pg_end));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
223 sh->npg_end--;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
224 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
225 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
226 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
227 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
228 sh->pg[npg].prev_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
229 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
230 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
231 tag = tag->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
232 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
233
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
234 if (sh->pg[npg].name) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
235 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
236 int r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
237 k = kh_put(m_s2i, sh->pg_hash, sh->pg[npg].name, &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
238 if (-1 == r) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
239 kh_val(sh->pg_hash, k) = npg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
240 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
241
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
242 /* Add to npg_end[] array. Remove later if we find a PP line */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
243 if (sh->npg_end >= sh->npg_end_alloc) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
244 sh->npg_end_alloc = sh->npg_end_alloc
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
245 ? sh->npg_end_alloc*2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
246 : 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
247 sh->pg_end = realloc(sh->pg_end,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
248 sh->npg_end_alloc * sizeof(int));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
249 if (!sh->pg_end)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
250 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
251 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
252 sh->pg_end[sh->npg_end++] = npg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
253
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
254 sh->npg++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
255 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
256
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
257 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
258 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
259
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
260 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
261 * Appends a formatted line to an existing SAM header.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
262 * Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
263 * optional new-line. If it contains more than 1 line then multiple lines
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
264 * will be added in order.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
265 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
266 * Len is the length of the text data, or 0 if unknown (in which case
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
267 * it should be null terminated).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
268 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
269 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
270 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
271 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
272 int sam_hdr_add_lines(SAM_hdr *sh, const char *lines, int len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
273 int i, lno = 1, text_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
274 char *hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
275
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
276 if (!len)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
277 len = strlen(lines);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
278
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
279 text_offset = ks_len(&sh->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
280 if (EOF == kputsn(lines, len, &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
281 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
282 hdr = ks_str(&sh->text) + text_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
283
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
284 for (i = 0; i < len; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
285 khint32_t type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
286 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
287
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
288 int l_start = i, new;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
289 SAM_hdr_type *h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
290 SAM_hdr_tag *h_tag, *last;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
291
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
292 if (hdr[i] != '@') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
293 int j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
294 for (j = i; j < len && hdr[j] != '\n'; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
295 ;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
296 sam_hdr_error("Header line does not start with '@'",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
297 &hdr[l_start], len - l_start, lno);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
298 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
299 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
300
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
301 type = (hdr[i+1]<<8) | hdr[i+2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
302 if (hdr[i+1] < 'A' || hdr[i+1] > 'z' ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
303 hdr[i+2] < 'A' || hdr[i+2] > 'z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
304 sam_hdr_error("Header line does not have a two character key",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
305 &hdr[l_start], len - l_start, lno);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
306 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
307 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
308
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
309 i += 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
310 if (hdr[i] == '\n')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
311 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
312
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
313 // Add the header line type
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
314 if (!(h_type = pool_alloc(sh->type_pool)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
315 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
316 if (-1 == (k = kh_put(sam_hdr, sh->h, type, &new)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
317 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
318
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
319 // Form the ring, either with self or other lines of this type
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
320 if (!new) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
321 SAM_hdr_type *t = kh_val(sh->h, k), *p;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
322 p = t->prev;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
323
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
324 assert(p->next = t);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
325 p->next = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
326 h_type->prev = p;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
327
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
328 t->prev = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
329 h_type->next = t;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
330 h_type->order = p->order+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
331 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
332 kh_val(sh->h, k) = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
333 h_type->prev = h_type->next = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
334 h_type->order = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
335 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
336
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
337 // Parse the tags on this line
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
338 last = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
339 if ((type>>8) == 'C' && (type&0xff) == 'O') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
340 int j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
341 if (hdr[i] != '\t') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
342 sam_hdr_error("Missing tab",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
343 &hdr[l_start], len - l_start, lno);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
344 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
345 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
346
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
347 for (j = ++i; j < len && hdr[j] != '\n'; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
348 ;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
349
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
350 if (!(h_type->tag = h_tag = pool_alloc(sh->tag_pool)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
351 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
352 h_tag->str = string_ndup(sh->str_pool, &hdr[i], j-i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
353 h_tag->len = j-i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
354 h_tag->next = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
355 if (!h_tag->str)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
356 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
357
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
358 i = j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
359
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
360 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
361 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
362 int j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
363 if (hdr[i] != '\t') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
364 sam_hdr_error("Missing tab",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
365 &hdr[l_start], len - l_start, lno);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
366 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
367 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
368
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
369 for (j = ++i; j < len && hdr[j] != '\n' && hdr[j] != '\t'; j++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
370 ;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
371
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
372 if (!(h_tag = pool_alloc(sh->tag_pool)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
373 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
374 h_tag->str = string_ndup(sh->str_pool, &hdr[i], j-i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
375 h_tag->len = j-i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
376 h_tag->next = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
377 if (!h_tag->str)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
378 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
379
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
380 if (h_tag->len < 3 || h_tag->str[2] != ':') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
381 sam_hdr_error("Malformed key:value pair",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
382 &hdr[l_start], len - l_start, lno);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
383 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
384 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
385
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
386 if (last)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
387 last->next = h_tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
388 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
389 h_type->tag = h_tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
390
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
391 last = h_tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
392 i = j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
393 } while (i < len && hdr[i] != '\n');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
394 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
395
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
396 /* Update RG/SQ hashes */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
397 if (-1 == sam_hdr_update_hashes(sh, type, h_type))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
398 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
399 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
400
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
401 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
402 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
403
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
404 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
405 * Adds a single line to a SAM header.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
406 * Specify type and one or more key,value pairs, ending with the NULL key.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
407 * Eg. sam_hdr_add(h, "SQ", "ID", "foo", "LN", "100", NULL).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
408 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
409 * Returns index for specific entry on success (eg 2nd SQ, 4th RG)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
410 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
411 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
412 int sam_hdr_add(SAM_hdr *sh, const char *type, ...) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
413 va_list args;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
414 va_start(args, type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
415 return sam_hdr_vadd(sh, type, args, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
416 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
417
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
418 int sam_hdr_vadd(SAM_hdr *sh, const char *type, va_list ap, ...) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
419 va_list args;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
420 SAM_hdr_type *h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
421 SAM_hdr_tag *h_tag, *last;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
422 int new;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
423 khint32_t type_i = (type[0]<<8) | type[1], k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
424
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
425 #if defined(HAVE_VA_COPY)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
426 va_list ap_local;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
427 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
428
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
429 if (EOF == kputc_('@', &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
430 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
431 if (EOF == kputsn(type, 2, &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
432 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
433
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
434 if (!(h_type = pool_alloc(sh->type_pool)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
435 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
436 if (-1 == (k = kh_put(sam_hdr, sh->h, type_i, &new)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
437 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
438 kh_val(sh->h, k) = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
439
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
440 // Form the ring, either with self or other lines of this type
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
441 if (!new) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
442 SAM_hdr_type *t = kh_val(sh->h, k), *p;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
443 p = t->prev;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
444
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
445 assert(p->next = t);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
446 p->next = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
447 h_type->prev = p;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
448
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
449 t->prev = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
450 h_type->next = t;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
451 h_type->order = p->order + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
452 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
453 h_type->prev = h_type->next = h_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
454 h_type->order = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
455 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
456
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
457 last = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
458
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
459 // Any ... varargs
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
460 va_start(args, ap);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
461 for (;;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
462 char *k, *v;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
463 int idx;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
464
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
465 if (!(k = (char *)va_arg(args, char *)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
466 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
467 v = va_arg(args, char *);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
468
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
469 if (EOF == kputc_('\t', &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
470 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
471
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
472 if (!(h_tag = pool_alloc(sh->tag_pool)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
473 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
474 idx = ks_len(&sh->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
475
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
476 if (EOF == kputs(k, &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
477 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
478 if (EOF == kputc_(':', &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
479 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
480 if (EOF == kputs(v, &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
481 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
482
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
483 h_tag->len = ks_len(&sh->text) - idx;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
484 h_tag->str = string_ndup(sh->str_pool,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
485 ks_str(&sh->text) + idx,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
486 h_tag->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
487 h_tag->next = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
488 if (!h_tag->str)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
489 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
490
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
491 if (last)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
492 last->next = h_tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
493 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
494 h_type->tag = h_tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
495
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
496 last = h_tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
497 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
498 va_end(args);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
499
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
500 #if defined(HAVE_VA_COPY)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
501 va_copy(ap_local, ap);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
502 # define ap ap_local
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
503 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
504
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
505 // Plus the specified va_list params
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
506 for (;;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
507 char *k, *v;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
508 int idx;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
509
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
510 if (!(k = (char *)va_arg(ap, char *)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
511 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
512 v = va_arg(ap, char *);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
513
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
514 if (EOF == kputc_('\t', &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
515 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
516
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
517 if (!(h_tag = pool_alloc(sh->tag_pool)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
518 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
519 idx = ks_len(&sh->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
520
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
521 if (EOF == kputs(k, &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
522 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
523 if (EOF == kputc_(':', &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
524 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
525 if (EOF == kputs(v, &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
526 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
527
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
528 h_tag->len = ks_len(&sh->text) - idx;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
529 h_tag->str = string_ndup(sh->str_pool,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
530 ks_str(&sh->text) + idx,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
531 h_tag->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
532 h_tag->next = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
533 if (!h_tag->str)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
534 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
535
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
536 if (last)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
537 last->next = h_tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
538 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
539 h_type->tag = h_tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
540
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
541 last = h_tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
542 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
543 va_end(ap);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
544
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
545 if (EOF == kputc('\n', &sh->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
546 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
547
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
548 int itype = (type[0]<<8) | type[1];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
549 if (-1 == sam_hdr_update_hashes(sh, itype, h_type))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
550 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
551
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
552 return h_type->order;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
553 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
554
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
555 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
556 * Returns the first header item matching 'type'. If ID is non-NULL it checks
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
557 * for the tag ID: and compares against the specified ID.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
558 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
559 * Returns NULL if no type/ID is found
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
560 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
561 SAM_hdr_type *sam_hdr_find(SAM_hdr *hdr, char *type,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
562 char *ID_key, char *ID_value) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
563 SAM_hdr_type *t1, *t2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
564 int itype = (type[0]<<8)|(type[1]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
565 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
566
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
567 /* Special case for types we have prebuilt hashes on */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
568 if (ID_key) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
569 if (type[0] == 'S' && type[1] == 'Q' &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
570 ID_key[0] == 'S' && ID_key[1] == 'N') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
571 k = kh_get(m_s2i, hdr->ref_hash, ID_value);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
572 return k != kh_end(hdr->ref_hash)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
573 ? hdr->ref[kh_val(hdr->ref_hash, k)].ty
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
574 : NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
575 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
576
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
577 if (type[0] == 'R' && type[1] == 'G' &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
578 ID_key[0] == 'I' && ID_key[1] == 'D') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
579 k = kh_get(m_s2i, hdr->rg_hash, ID_value);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
580 return k != kh_end(hdr->rg_hash)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
581 ? hdr->rg[kh_val(hdr->rg_hash, k)].ty
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
582 : NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
583 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
584
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
585 if (type[0] == 'P' && type[1] == 'G' &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
586 ID_key[0] == 'I' && ID_key[1] == 'D') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
587 k = kh_get(m_s2i, hdr->pg_hash, ID_value);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
588 return k != kh_end(hdr->pg_hash)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
589 ? hdr->pg[kh_val(hdr->pg_hash, k)].ty
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
590 : NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
591 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
592 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
593
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
594 k = kh_get(sam_hdr, hdr->h, itype);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
595 if (k == kh_end(hdr->h))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
596 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
597
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
598 if (!ID_key)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
599 return kh_val(hdr->h, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
600
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
601 t1 = t2 = kh_val(hdr->h, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
602 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
603 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
604 for (tag = t1->tag; tag; tag = tag->next) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
605 if (tag->str[0] == ID_key[0] && tag->str[1] == ID_key[1]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
606 char *cp1 = tag->str+3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
607 char *cp2 = ID_value;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
608 while (*cp1 && *cp1 == *cp2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
609 cp1++, cp2++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
610 if (*cp2 || *cp1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
611 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
612 return t1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
613 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
614 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
615 t1 = t1->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
616 } while (t1 != t2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
617
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
618 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
619 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
620
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
621 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
622 * As per SAM_hdr_type, but returns a complete line of formatted text
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
623 * for a specific head type/ID combination. If ID is NULL then it returns
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
624 * the first line of the specified type.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
625 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
626 * The returned string is malloced and should be freed by the calling
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
627 * function with free().
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
628 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
629 * Returns NULL if no type/ID is found.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
630 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
631 char *sam_hdr_find_line(SAM_hdr *hdr, char *type,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
632 char *ID_key, char *ID_value) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
633 SAM_hdr_type *ty = sam_hdr_find(hdr, type, ID_key, ID_value);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
634 kstring_t ks = KS_INITIALIZER;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
635 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
636 int r = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
637
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
638 if (!ty)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
639 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
640
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
641 // Paste together the line from the hashed copy
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
642 r |= (kputc_('@', &ks) == EOF);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
643 r |= (kputs(type, &ks) == EOF);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
644 for (tag = ty->tag; tag; tag = tag->next) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
645 r |= (kputc_('\t', &ks) == EOF);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
646 r |= (kputsn(tag->str, tag->len, &ks) == EOF);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
647 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
648
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
649 if (r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
650 KS_FREE(&ks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
651 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
652 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
653
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
654 return ks_str(&ks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
655 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
656
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
657
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
658 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
659 * Looks for a specific key in a single sam header line.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
660 * If prev is non-NULL it also fills this out with the previous tag, to
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
661 * permit use in key removal. *prev is set to NULL when the tag is the first
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
662 * key in the list. When a tag isn't found, prev (if non NULL) will be the last
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
663 * tag in the existing list.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
664 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
665 * Returns the tag pointer on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
666 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
667 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
668 SAM_hdr_tag *sam_hdr_find_key(SAM_hdr *sh,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
669 SAM_hdr_type *type,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
670 char *key,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
671 SAM_hdr_tag **prev) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
672 SAM_hdr_tag *tag, *p = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
673
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
674 for (tag = type->tag; tag; p = tag, tag = tag->next) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
675 if (tag->str[0] == key[0] && tag->str[1] == key[1]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
676 if (prev)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
677 *prev = p;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
678 return tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
679 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
680 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
681
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
682 if (prev)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
683 *prev = p;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
684
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
685 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
686 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
687
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
688
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
689 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
690 * Adds or updates tag key,value pairs in a header line.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
691 * Eg for adding M5 tags to @SQ lines or updating sort order for the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
692 * @HD line (although use the sam_hdr_sort_order() function for
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
693 * HD manipulation, which is a wrapper around this funuction).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
694 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
695 * Specify multiple key,value pairs ending in NULL.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
696 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
697 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
698 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
699 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
700 int sam_hdr_update(SAM_hdr *hdr, SAM_hdr_type *type, ...) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
701 va_list ap;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
702
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
703 va_start(ap, type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
704
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
705 for (;;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
706 char *k, *v;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
707 int idx;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
708 SAM_hdr_tag *tag, *prev;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
709
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
710 if (!(k = (char *)va_arg(ap, char *)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
711 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
712 v = va_arg(ap, char *);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
713
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
714 tag = sam_hdr_find_key(hdr, type, k, &prev);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
715 if (!tag) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
716 if (!(tag = pool_alloc(hdr->tag_pool)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
717 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
718 if (prev)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
719 prev->next = tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
720 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
721 type->tag = tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
722
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
723 tag->next = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
724 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
725
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
726 idx = ks_len(&hdr->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
727 if (ksprintf(&hdr->text, "%2.2s:%s", k, v) < 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
728 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
729 tag->len = ks_len(&hdr->text) - idx;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
730 tag->str = string_ndup(hdr->str_pool,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
731 ks_str(&hdr->text) + idx,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
732 tag->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
733 if (!tag->str)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
734 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
735 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
736
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
737 va_end(ap);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
738
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
739 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
740 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
741
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
742 #define K(a) (((a)[0]<<8)|((a)[1]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
743
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
744 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
745 * Reconstructs the kstring from the header hash table.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
746 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
747 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
748 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
749 int sam_hdr_rebuild(SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
750 /* Order: HD then others */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
751 kstring_t ks = KS_INITIALIZER;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
752 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
753
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
754
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
755 k = kh_get(sam_hdr, hdr->h, K("HD"));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
756 if (k != kh_end(hdr->h)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
757 SAM_hdr_type *ty = kh_val(hdr->h, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
758 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
759 if (EOF == kputs("@HD", &ks))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
760 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
761 for (tag = ty->tag; tag; tag = tag->next) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
762 if (EOF == kputc_('\t', &ks))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
763 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
764 if (EOF == kputsn_(tag->str, tag->len, &ks))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
765 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
766 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
767 if (EOF == kputc('\n', &ks))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
768 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
769 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
770
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
771 for (k = kh_begin(hdr->h); k != kh_end(hdr->h); k++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
772 SAM_hdr_type *t1, *t2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
773
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
774 if (!kh_exist(hdr->h, k))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
775 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
776
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
777 if (kh_key(hdr->h, k) == K("HD"))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
778 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
779
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
780 t1 = t2 = kh_val(hdr->h, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
781 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
782 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
783 char c[2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
784
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
785 if (EOF == kputc_('@', &ks))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
786 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
787 c[0] = kh_key(hdr->h, k)>>8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
788 c[1] = kh_key(hdr->h, k)&0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
789 if (EOF == kputsn_(c, 2, &ks))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
790 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
791 for (tag = t1->tag; tag; tag=tag->next) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
792 if (EOF == kputc_('\t', &ks))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
793 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
794 if (EOF == kputsn_(tag->str, tag->len, &ks))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
795 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
796 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
797 if (EOF == kputc('\n', &ks))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
798 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
799 t1 = t1->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
800 } while (t1 != t2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
801 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
802
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
803 if (ks_str(&hdr->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
804 KS_FREE(&hdr->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
805
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
806 hdr->text = ks;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
807
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
808 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
809 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
810
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
811
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
812 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
813 * Creates an empty SAM header, ready to be populated.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
814 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
815 * Returns a SAM_hdr struct on success (free with sam_hdr_free())
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
816 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
817 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
818 SAM_hdr *sam_hdr_new() {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
819 SAM_hdr *sh = calloc(1, sizeof(*sh));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
820
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
821 if (!sh)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
822 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
823
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
824 sh->h = kh_init(sam_hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
825 if (!sh->h)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
826 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
827
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
828 sh->ID_cnt = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
829 sh->ref_count = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
830
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
831 sh->nref = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
832 sh->ref = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
833 if (!(sh->ref_hash = kh_init(m_s2i)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
834 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
835
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
836 sh->nrg = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
837 sh->rg = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
838 if (!(sh->rg_hash = kh_init(m_s2i)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
839 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
840
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
841 sh->npg = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
842 sh->pg = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
843 sh->npg_end = sh->npg_end_alloc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
844 sh->pg_end = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
845 if (!(sh->pg_hash = kh_init(m_s2i)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
846 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
847
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
848 KS_INIT(&sh->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
849
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
850 if (!(sh->tag_pool = pool_create(sizeof(SAM_hdr_tag))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
851 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
852
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
853 if (!(sh->type_pool = pool_create(sizeof(SAM_hdr_type))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
854 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
855
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
856 if (!(sh->str_pool = string_pool_create(8192)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
857 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
858
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
859 return sh;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
860
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
861 err:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
862 if (sh->h)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
863 kh_destroy(sam_hdr, sh->h);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
864
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
865 if (sh->tag_pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
866 pool_destroy(sh->tag_pool);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
867
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
868 if (sh->type_pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
869 pool_destroy(sh->type_pool);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
870
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
871 if (sh->str_pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
872 string_pool_destroy(sh->str_pool);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
873
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
874 free(sh);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
875
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
876 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
877 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
878
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
879
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
880 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
881 * Tokenises a SAM header into a hash table.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
882 * Also extracts a few bits on specific data types, such as @RG lines.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
883 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
884 * Returns a SAM_hdr struct on success (free with sam_hdr_free())
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
885 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
886 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
887 SAM_hdr *sam_hdr_parse_(const char *hdr, int len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
888 /* Make an empty SAM_hdr */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
889 SAM_hdr *sh;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
890
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
891 sh = sam_hdr_new();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
892 if (NULL == sh) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
893
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
894 if (NULL == hdr) return sh; // empty header is permitted
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
895
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
896 /* Parse the header, line by line */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
897 if (-1 == sam_hdr_add_lines(sh, hdr, len)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
898 sam_hdr_free(sh);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
899 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
900 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
901
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
902 //sam_hdr_dump(sh);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
903 //sam_hdr_add(sh, "RG", "ID", "foo", "SM", "bar", NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
904 //sam_hdr_rebuild(sh);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
905 //printf(">>%s<<", ks_str(sh->text));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
906
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
907 //parse_references(sh);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
908 //parse_read_groups(sh);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
909
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
910 sam_hdr_link_pg(sh);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
911 //sam_hdr_dump(sh);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
912
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
913 return sh;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
914 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
915
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
916 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
917 * Produces a duplicate copy of hdr and returns it.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
918 * Returns NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
919 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
920 SAM_hdr *sam_hdr_dup(SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
921 if (-1 == sam_hdr_rebuild(hdr))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
922 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
923
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
924 return sam_hdr_parse_(sam_hdr_str(hdr), sam_hdr_length(hdr));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
925 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
926
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
927 /*! Increments a reference count on hdr.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
928 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
929 * This permits multiple files to share the same header, all calling
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
930 * sam_hdr_free when done, without causing errors for other open files.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
931 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
932 void sam_hdr_incr_ref(SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
933 hdr->ref_count++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
934 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
935
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
936 /*! Increments a reference count on hdr.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
937 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
938 * This permits multiple files to share the same header, all calling
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
939 * sam_hdr_free when done, without causing errors for other open files.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
940 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
941 * If the reference count hits zero then the header is automatically
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
942 * freed. This makes it a synonym for sam_hdr_free().
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
943 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
944 void sam_hdr_decr_ref(SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
945 sam_hdr_free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
946 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
947
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
948 /*! Deallocates all storage used by a SAM_hdr struct.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
949 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
950 * This also decrements the header reference count. If after decrementing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
951 * it is still non-zero then the header is assumed to be in use by another
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
952 * caller and the free is not done.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
953 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
954 * This is a synonym for sam_hdr_dec_ref().
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
955 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
956 void sam_hdr_free(SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
957 if (!hdr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
958 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
959
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
960 if (--hdr->ref_count > 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
961 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
962
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
963 if (ks_str(&hdr->text))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
964 KS_FREE(&hdr->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
965
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
966 if (hdr->h)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
967 kh_destroy(sam_hdr, hdr->h);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
968
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
969 if (hdr->ref_hash)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
970 kh_destroy(m_s2i, hdr->ref_hash);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
971
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
972 if (hdr->ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
973 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
974 for (i = 0; i < hdr->nref; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
975 if (hdr->ref[i].name)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
976 free(hdr->ref[i].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
977 free(hdr->ref);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
978 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
979
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
980 if (hdr->rg_hash)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
981 kh_destroy(m_s2i, hdr->rg_hash);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
982
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
983 if (hdr->rg) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
984 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
985 for (i = 0; i < hdr->nrg; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
986 if (hdr->rg[i].name)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
987 free(hdr->rg[i].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
988 free(hdr->rg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
989 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
990
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
991 if (hdr->pg_hash)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
992 kh_destroy(m_s2i, hdr->pg_hash);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
993
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
994 if (hdr->pg) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
995 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
996 for (i = 0; i < hdr->npg; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
997 if (hdr->pg[i].name)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
998 free(hdr->pg[i].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
999 free(hdr->pg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1000 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1001
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1002 if (hdr->pg_end)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1003 free(hdr->pg_end);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1004
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1005 if (hdr->type_pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1006 pool_destroy(hdr->type_pool);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1007
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1008 if (hdr->tag_pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1009 pool_destroy(hdr->tag_pool);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1010
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1011 if (hdr->str_pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1012 string_pool_destroy(hdr->str_pool);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1013
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1014 free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1015 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1016
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1017 int sam_hdr_length(SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1018 return ks_len(&hdr->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1019 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1020
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1021 char *sam_hdr_str(SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1022 return ks_str(&hdr->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1023 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1024
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1025 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1026 * Looks up a reference sequence by name and returns the numerical ID.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1027 * Returns -1 if unknown reference.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1028 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1029 int sam_hdr_name2ref(SAM_hdr *hdr, const char *ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1030 khint_t k = kh_get(m_s2i, hdr->ref_hash, ref);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1031 return k == kh_end(hdr->ref_hash) ? -1 : kh_val(hdr->ref_hash, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1032 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1033
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1034 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1035 * Looks up a read-group by name and returns a pointer to the start of the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1036 * associated tag list.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1037 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1038 * Returns NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1039 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1040 SAM_RG *sam_hdr_find_rg(SAM_hdr *hdr, const char *rg) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1041 khint_t k = kh_get(m_s2i, hdr->rg_hash, rg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1042 return k == kh_end(hdr->rg_hash)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1043 ? NULL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1044 : &hdr->rg[kh_val(hdr->rg_hash, k)];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1045 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1046
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1047
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1048 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1049 * Fixes any PP links in @PG headers.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1050 * If the entries are in order then this doesn't need doing, but incase
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1051 * our header is out of order this goes through the sh->pg[] array
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1052 * setting the prev_id field.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1053 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1054 * Note we can have multiple complete chains. This code should identify the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1055 * tails of these chains as these are the entries we have to link to in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1056 * subsequent PP records.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1057 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1058 * Returns 0 on sucess
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1059 * -1 on failure (indicating broken PG/PP records)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1060 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1061 int sam_hdr_link_pg(SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1062 int i, j, ret = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1063
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1064 hdr->npg_end_alloc = hdr->npg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1065 hdr->pg_end = realloc(hdr->pg_end, hdr->npg * sizeof(*hdr->pg_end));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1066 if (!hdr->pg_end)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1067 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1068
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1069 for (i = 0; i < hdr->npg; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1070 hdr->pg_end[i] = i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1071
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1072 for (i = 0; i < hdr->npg; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1073 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1074 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1075 char tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1076
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1077 for (tag = hdr->pg[i].tag; tag; tag = tag->next) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1078 if (tag->str[0] == 'P' && tag->str[1] == 'P')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1079 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1080 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1081 if (!tag) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1082 /* Chain start points */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1083 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1084 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1085
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1086 tmp = tag->str[tag->len]; tag->str[tag->len] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1087 k = kh_get(m_s2i, hdr->pg_hash, tag->str+3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1088 tag->str[tag->len] = tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1089
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1090 if (k == kh_end(hdr->pg_hash)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1091 ret = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1092 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1093 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1094
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1095 hdr->pg[i].prev_id = hdr->pg[kh_val(hdr->pg_hash, k)].id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1096 hdr->pg_end[kh_val(hdr->pg_hash, k)] = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1097 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1098
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1099 for (i = j = 0; i < hdr->npg; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1100 if (hdr->pg_end[i] != -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1101 hdr->pg_end[j++] = hdr->pg_end[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1102 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1103 hdr->npg_end = j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1104
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1105 return ret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1106 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1107
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1108 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1109 * Returns a unique ID from a base name.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1110 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1111 * The value returned is valid until the next call to
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1112 * this function.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1113 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1114 const char *sam_hdr_PG_ID(SAM_hdr *sh, const char *name) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1115 khint_t k = kh_get(m_s2i, sh->pg_hash, name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1116 if (k == kh_end(sh->pg_hash))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1117 return name;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1118
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1119 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1120 sprintf(sh->ID_buf, "%.1000s.%d", name, sh->ID_cnt++);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1121 k = kh_get(m_s2i, sh->pg_hash, sh->ID_buf);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1122 } while (k == kh_end(sh->pg_hash));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1123
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1124 return sh->ID_buf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1125 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1126
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1127 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1128 * Add an @PG line.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1129 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1130 * If we wish complete control over this use sam_hdr_add() directly. This
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1131 * function uses that, but attempts to do a lot of tedious house work for
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1132 * you too.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1133 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1134 * - It will generate a suitable ID if the supplied one clashes.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1135 * - It will generate multiple @PG records if we have multiple PG chains.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1136 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1137 * Call it as per sam_hdr_add() with a series of key,value pairs ending
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1138 * in NULL.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1139 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1140 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1141 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1142 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1143 int sam_hdr_add_PG(SAM_hdr *sh, const char *name, ...) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1144 va_list args;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1145 va_start(args, name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1146
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1147 if (sh->npg_end) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1148 /* Copy ends array to avoid us looping while modifying it */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1149 int *end = malloc(sh->npg_end * sizeof(int));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1150 int i, nends = sh->npg_end;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1151
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1152 if (!end)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1153 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1154
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1155 memcpy(end, sh->pg_end, nends * sizeof(*end));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1156
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1157 for (i = 0; i < nends; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1158 if (-1 == sam_hdr_vadd(sh, "PG", args,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1159 "ID", sam_hdr_PG_ID(sh, name),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1160 "PN", name,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1161 "PP", sh->pg[end[i]].name,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1162 NULL)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1163 free(end);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1164 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1165 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1166 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1167
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1168 free(end);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1169 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1170 if (-1 == sam_hdr_vadd(sh, "PG", args,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1171 "ID", sam_hdr_PG_ID(sh, name),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1172 "PN", name,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1173 NULL))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1174 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1175 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1176
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1177 //sam_hdr_dump(sh);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1178
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1179 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1180 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1181
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1182 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1183 * A function to help with construction of CL tags in @PG records.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1184 * Takes an argc, argv pair and returns a single space-separated string.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1185 * This string should be deallocated by the calling function.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1186 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1187 * Returns malloced char * on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1188 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1189 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1190 char *stringify_argv(int argc, char *argv[]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1191 char *str, *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1192 size_t nbytes = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1193 int i, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1194
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1195 /* Allocate */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1196 for (i = 0; i < argc; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1197 nbytes += strlen(argv[i]) + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1198 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1199 if (!(str = malloc(nbytes)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1200 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1201
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1202 /* Copy */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1203 cp = str;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1204 for (i = 0; i < argc; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1205 j = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1206 while (argv[i][j]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1207 if (argv[i][j] == '\t')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1208 *cp++ = ' ';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1209 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1210 *cp++ = argv[i][j];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1211 j++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1212 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1213 *cp++ = ' ';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1214 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1215 *cp++ = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1216
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1217 return str;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1218 }