Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/progs/srf_info.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 /* | |
2 * ====================================================================== | |
3 * This software has been created by Genome Research Limited (GRL). | |
4 * | |
5 * GRL hereby grants permission to use, copy, modify and distribute | |
6 * this software and its documentation for non-commercial purposes | |
7 * without fee at the user's own risk on the basis set out below. | |
8 * | |
9 * GRL neither undertakes nor accepts any duty whether contractual or | |
10 * otherwise in connection with the software, its use or the use of | |
11 * any derivative, and makes no representations or warranties, express | |
12 * or implied, concerning the software, its suitability, fitness for | |
13 * a particular purpose or non-infringement. | |
14 * | |
15 * In no event shall the authors of the software or GRL be responsible | |
16 * or liable for any loss or damage whatsoever arising in any way | |
17 * directly or indirectly out of the use of this software or its | |
18 * derivatives, even if advised of the possibility of such damage. | |
19 * | |
20 * Our software can be freely distributed under the conditions set out | |
21 * above, and must contain this copyright notice. | |
22 * ====================================================================== | |
23 */ | |
24 | |
25 /* | |
26 * This performs a linear (non-indexed) search for a trace in an SRF archive. | |
27 * | |
28 * It's not intended as a suitable production program or as a library of code | |
29 * to use, but as a test and benchmark statistic. | |
30 */ | |
31 | |
32 #include <string.h> | |
33 #include <stdio.h> | |
34 #include <unistd.h> | |
35 #include <ctype.h> | |
36 #include <sys/stat.h> | |
37 #include <sys/types.h> | |
38 #include <io_lib/Read.h> | |
39 #include <io_lib/misc.h> | |
40 #include <io_lib/ztr.h> | |
41 #include <io_lib/srf.h> | |
42 #include <io_lib/hash_table.h> | |
43 | |
44 #define LEVEL_READ (1 << 0) | |
45 #define LEVEL_CHUNK (1 << 1) | |
46 #define LEVEL_NAME (1 << 2) | |
47 #define LEVEL_BASE (1 << 3) | |
48 #define LEVEL_ALL (LEVEL_READ | LEVEL_CHUNK | LEVEL_NAME | LEVEL_BASE ); | |
49 | |
50 /* only checks the first 10 traces */ | |
51 #define LEVEL_CHECK 255 | |
52 | |
53 #define READ_GOOD 0 | |
54 #define READ_BAD 1 | |
55 #define READ_TOTAL 2 | |
56 | |
57 #define NREADS 3 | |
58 | |
59 #define READ_GOOD_STR "GOOD" | |
60 #define READ_BAD_STR "BAD" | |
61 #define READ_TOTAL_STR "TOTAL" | |
62 | |
63 /* see ztr.h for a list of all possible ztr chunk types */ | |
64 | |
65 #define CHUNK_BASE 0 | |
66 #define CHUNK_CNF1 1 | |
67 #define CHUNK_CNF4 2 | |
68 #define CHUNK_SAMP 3 | |
69 #define CHUNK_SMP4 4 | |
70 #define CHUNK_REGN 5 | |
71 | |
72 #define NCHUNKS 6 | |
73 | |
74 #define CHUNK_BASE_TYPE ZTR_TYPE_BASE | |
75 #define CHUNK_CNF1_TYPE ZTR_TYPE_CNF1 | |
76 #define CHUNK_CNF4_TYPE ZTR_TYPE_CNF4 | |
77 #define CHUNK_SAMP_TYPE ZTR_TYPE_SAMP | |
78 #define CHUNK_SMP4_TYPE ZTR_TYPE_SMP4 | |
79 #define CHUNK_REGN_TYPE ZTR_TYPE_REGN | |
80 | |
81 #define KEY_TYPE 0 | |
82 #define KEY_VALTYPE 1 | |
83 #define KEY_GROUP 2 | |
84 #define KEY_OFFS 3 | |
85 #define KEY_SCALE 4 | |
86 #define KEY_COORD 5 | |
87 #define KEY_NAME 6 | |
88 | |
89 #define NKEYS 7 | |
90 | |
91 #define KEY_TYPE_STR "TYPE" | |
92 #define KEY_VALTYPE_STR "VALTYPE" | |
93 #define KEY_GROUP_STR "GROUP" | |
94 #define KEY_OFFS_STR "OFFS" | |
95 #define KEY_SCALE_STR "SCALE" | |
96 #define KEY_COORD_STR "COORD" | |
97 #define KEY_NAME_STR "NAME" | |
98 | |
99 #define TYPE_PROC 0 | |
100 #define TYPE_SLXI 1 | |
101 #define TYPE_SLXN 2 | |
102 #define TYPE_0FAM 3 | |
103 #define TYPE_1CY3 4 | |
104 #define TYPE_2TXR 5 | |
105 #define TYPE_3CY5 6 | |
106 | |
107 #define NTYPES 7 | |
108 | |
109 #define TYPE_PROC_STR "PROC" | |
110 #define TYPE_SLXI_STR "SLXI" | |
111 #define TYPE_SLXN_STR "SLXN" | |
112 #define TYPE_0FAM_STR "0FAM" | |
113 #define TYPE_1CY3_STR "1CY3" | |
114 #define TYPE_2TXR_STR "2TXR" | |
115 #define TYPE_3CY5_STR "3CY5" | |
116 | |
117 #define MAX_REGIONS 4 | |
118 | |
119 /* regn chunk */ | |
120 typedef struct { | |
121 char coord; | |
122 char *region_names; | |
123 int nregions; | |
124 char *name[MAX_REGIONS]; | |
125 char code[MAX_REGIONS]; | |
126 int start[MAX_REGIONS]; | |
127 int length[MAX_REGIONS]; | |
128 int count; | |
129 } regn_t; | |
130 | |
131 /* ------------------------------------------------------------------------ */ | |
132 /* | |
133 * Print usage message to stderr and exit with the given \"code\". | |
134 */ | |
135 void usage(int code) { | |
136 fprintf(stderr, "Usage: srf_info [-level level_bitmap] input(s)\n"); | |
137 fprintf(stderr, "Options:\n"); | |
138 fprintf(stderr, " -l level_bitmap \n"); | |
139 fprintf(stderr, " 1\tCount of good/bad reads.\n"); | |
140 fprintf(stderr, " 2\tCounts for selected chunk types.\n"); | |
141 fprintf(stderr, " 4\tTrace count and trace name prefix for each trace_header.\n"); | |
142 fprintf(stderr, " 8\tBase count.\n"); | |
143 fprintf(stderr, "\n"); | |
144 | |
145 exit(code); | |
146 } | |
147 | |
148 /* | |
149 * Parse the REGN chunk, add to regn HASH | |
150 * | |
151 * Returns corresponding HashItem * from regn Hash | |
152 */ | |
153 HashItem *parse_regn(ztr_t *z, ztr_chunk_t *chunk, HashTable *regn_hash) { | |
154 char key[1024]; | |
155 char *name; | |
156 HashItem *hi; | |
157 regn_t *regn; | |
158 size_t l; | |
159 | |
160 uncompress_chunk(z, chunk); | |
161 | |
162 /* the hash key is a combination of the region names and boundaries */ | |
163 name = ztr_lookup_mdata_value(z, chunk, "NAME"); | |
164 l = snprintf(key, sizeof(key), "names=%s", name); | |
165 if( chunk->dlength ){ | |
166 int nbndy = (chunk->dlength-1)/4; | |
167 uint4 *bndy = (uint4 *)(chunk->data+1); | |
168 int ibndy; | |
169 for (ibndy=0; ibndy<nbndy; ibndy++) { | |
170 if( ibndy ) | |
171 l += snprintf(key + l, sizeof(key) - l, | |
172 ";%d", be_int4(bndy[ibndy])); | |
173 else | |
174 l += snprintf(key + l, sizeof(key) - l, | |
175 " boundaries=%d", be_int4(bndy[ibndy])); | |
176 } | |
177 } | |
178 | |
179 if (NULL == (hi = (HashTableSearch(regn_hash, key, strlen(key))))) { | |
180 int iregion, nregions = 0; | |
181 char *coord; | |
182 char *cp1; | |
183 uint4 bndy[MAX_REGIONS]; | |
184 int ibndy, nbndy = 0; | |
185 HashData hd; | |
186 | |
187 if( NULL == (regn = (regn_t *)malloc(sizeof(regn_t)))) { | |
188 return NULL; | |
189 } | |
190 | |
191 coord = ztr_lookup_mdata_value(z, chunk, "COORD"); | |
192 regn->coord = (NULL == coord ? 'B' : *coord ); | |
193 | |
194 regn->region_names = strdup(name); | |
195 | |
196 cp1 = strtok (regn->region_names,";"); | |
197 while(cp1) { | |
198 char *cp2; | |
199 if(NULL == (cp2 = strchr(cp1,':'))) { | |
200 fprintf(stderr, "Invalid region name/code pair %s\n", cp1); | |
201 return NULL; | |
202 } | |
203 *cp2++ = '\0'; | |
204 regn->name[nregions] = cp1; | |
205 regn->code[nregions] = *cp2; | |
206 nregions++; | |
207 cp1 = strtok (NULL, ";"); | |
208 } | |
209 | |
210 regn->nregions = nregions; | |
211 | |
212 if( chunk->dlength ) { | |
213 nbndy = (chunk->dlength-1)/4; | |
214 memcpy(bndy, chunk->data+1, chunk->dlength-1); | |
215 } | |
216 | |
217 for( iregion=0, ibndy=0; iregion<nregions; iregion++) { | |
218 /* start = (start + length of previous region) or 0 if no previous region */ | |
219 /* length = (next boundary - start of region) or -1 if no next boundary */ | |
220 if( regn->code[iregion] == 'E' ){ | |
221 /* no sequence, length = 0 */ | |
222 regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0); | |
223 regn->length[iregion] = 0; | |
224 }else{ | |
225 if( ibndy > nbndy ){ | |
226 fprintf(stderr, "More name/code pairs than boundaries\n"); | |
227 return NULL; | |
228 } | |
229 regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0); | |
230 regn->length[iregion] = (ibndy == nbndy ? -1 : (be_int4(bndy[ibndy])-regn->start[iregion])); | |
231 ibndy++; | |
232 } | |
233 } | |
234 | |
235 regn->count = 1; | |
236 | |
237 hd.p = regn; | |
238 if (NULL == (hi = HashTableAdd(regn_hash, key, strlen(key), hd, NULL))) { | |
239 free(regn->region_names); | |
240 free(regn); | |
241 return NULL; | |
242 } | |
243 } else { | |
244 regn = (regn_t *)(hi->data.p); | |
245 regn->count++; | |
246 } | |
247 | |
248 return hi; | |
249 } | |
250 | |
251 /* | |
252 * Parse the sequence | |
253 * | |
254 * Returns 0 on success. | |
255 */ | |
256 int parse_base(ztr_t *z, ztr_chunk_t *chunk, uint64_t *base_count) { | |
257 int i; | |
258 | |
259 uncompress_chunk(z, chunk); | |
260 | |
261 for (i = 1; i < chunk->dlength; i++) { | |
262 char base = chunk->data[i]; | |
263 uint1 key; | |
264 switch (base) { | |
265 case 'A': case 'a': | |
266 key = 0; | |
267 break; | |
268 case 'C': case 'c': | |
269 key = 1; | |
270 break; | |
271 case 'G': case 'g': | |
272 key = 2; | |
273 break; | |
274 case 'T': case 't': | |
275 key = 3; | |
276 break; | |
277 default: | |
278 key = 4; | |
279 break; | |
280 } | |
281 base_count[key]++; | |
282 } | |
283 | |
284 return 0; | |
285 } | |
286 | |
287 /* | |
288 * count the mdata keys | |
289 * | |
290 * Returns 0 on success. | |
291 */ | |
292 int count_mdata_keys(ztr_t *z, ztr_chunk_t *chunk, int ichunk, long key_count[NCHUNKS][NKEYS], long type_count[NCHUNKS][NTYPES]) { | |
293 char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR}; | |
294 char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR}; | |
295 int ikey, itype; | |
296 | |
297 if (z->header.version_major > 1 || | |
298 z->header.version_minor >= 2) { | |
299 /* ZTR format 1.2 onwards */ | |
300 | |
301 char *cp = chunk->mdata; | |
302 int32_t dlen = chunk->mdlength; | |
303 | |
304 /* | |
305 * NB: we may wish to rewrite this using a dedicated state machine | |
306 * instead of strlen/strcmp as this currently assumes the meta- | |
307 * data is correctly formatted, which we cannot assume as the | |
308 * metadata is external and outside of our control. | |
309 * Passing in non-nul terminated strings could crash this code. | |
310 */ | |
311 while (dlen > 0) { | |
312 size_t l; | |
313 | |
314 /* key */ | |
315 l = strlen(cp); | |
316 for (ikey=0; ikey<NKEYS; ikey++) | |
317 if(0 == strcmp(cp, keys_str[ikey])) | |
318 break; | |
319 | |
320 cp += l+1; | |
321 dlen -= l+1; | |
322 | |
323 /* value */ | |
324 if (ikey < NKEYS) | |
325 key_count[ichunk][ikey]++; | |
326 | |
327 /* for the type key check the value */ | |
328 if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) { | |
329 for (itype=0; itype<NTYPES; itype++) | |
330 if(0 == strcmp(cp, types_str[itype])) | |
331 break; | |
332 if(itype < NTYPES) | |
333 type_count[ichunk][itype]++; | |
334 } | |
335 | |
336 l = strlen(cp); | |
337 cp += l+1; | |
338 dlen -= l+1; | |
339 } | |
340 | |
341 } else { | |
342 /* v1.1 and before only supported a few types, specifically coded | |
343 * per chunk type. | |
344 */ | |
345 | |
346 switch (chunk->type) { | |
347 case ZTR_TYPE_SAMP: | |
348 case ZTR_TYPE_SMP4: | |
349 key_count[ichunk][KEY_TYPE]++; | |
350 for (itype=0; itype<NTYPES; itype++) | |
351 if(0 == strcmp(chunk->mdata, types_str[itype])) { | |
352 type_count[ichunk][itype]++; | |
353 } | |
354 break; | |
355 | |
356 default: | |
357 break; | |
358 } | |
359 } | |
360 | |
361 return 0; | |
362 } | |
363 | |
364 /* | |
365 * As per partial_decode_ztr in srf.c, but without uncompress_ztr. | |
366 */ | |
367 ztr_t *partial_decode_ztr2(srf_t *srf, mFILE *mf, ztr_t *z) { | |
368 ztr_t *ztr; | |
369 ztr_chunk_t *chunk; | |
370 long pos = 0; | |
371 | |
372 if (z) { | |
373 /* Use existing ZTR object => already loaded header */ | |
374 ztr = z; | |
375 | |
376 } else { | |
377 /* Allocate or use existing ztr */ | |
378 if (NULL == (ztr = new_ztr())) | |
379 return NULL; | |
380 | |
381 /* Read the header */ | |
382 if (-1 == ztr_read_header(mf, &ztr->header)) { | |
383 if (!z) | |
384 delete_ztr(ztr); | |
385 mrewind(mf); | |
386 return NULL; | |
387 } | |
388 | |
389 /* Check magic number and version */ | |
390 if (memcmp(ztr->header.magic, ZTR_MAGIC, 8) != 0) { | |
391 if (!z) | |
392 delete_ztr(ztr); | |
393 mrewind(mf); | |
394 return NULL; | |
395 } | |
396 | |
397 if (ztr->header.version_major != ZTR_VERSION_MAJOR) { | |
398 if (!z) | |
399 delete_ztr(ztr); | |
400 mrewind(mf); | |
401 return NULL; | |
402 } | |
403 } | |
404 | |
405 /* Load chunks */ | |
406 pos = mftell(mf); | |
407 while (chunk = ztr_read_chunk_hdr(mf)) { | |
408 chunk->data = (char *)xmalloc(chunk->dlength); | |
409 if (chunk->dlength != mfread(chunk->data, 1, chunk->dlength, mf)) | |
410 break; | |
411 | |
412 ztr->nchunks++; | |
413 ztr->chunk = (ztr_chunk_t *)xrealloc(ztr->chunk, ztr->nchunks * | |
414 sizeof(ztr_chunk_t)); | |
415 memcpy(&ztr->chunk[ztr->nchunks-1], chunk, sizeof(*chunk)); | |
416 xfree(chunk); | |
417 pos = mftell(mf); | |
418 } | |
419 | |
420 /* | |
421 * At this stage we're 'pos' into the mFILE mf with any remainder being | |
422 * a partial block. | |
423 */ | |
424 if (0 == ztr->nchunks) { | |
425 if (!z) | |
426 delete_ztr(ztr); | |
427 mrewind(mf); | |
428 return NULL; | |
429 } | |
430 | |
431 /* Ensure we exit at the start of a ztr CHUNK */ | |
432 mfseek(mf, pos, SEEK_SET); | |
433 | |
434 /* If this is the header part, ensure we uncompress and init. data */ | |
435 if (!z) { | |
436 /* Force caching of huffman code_sets */ | |
437 ztr_find_hcode(ztr, CODE_USER); | |
438 | |
439 /* And uncompress the rest */ | |
440 uncompress_ztr(ztr); | |
441 } | |
442 | |
443 return ztr; | |
444 } | |
445 | |
446 /* | |
447 * Given the archive name and the level_mode | |
448 * generate information about the archive | |
449 * | |
450 * Note the generated srf file is NOT indexed | |
451 * | |
452 * Returns 0 on success. | |
453 */ | |
454 int srf_info(char *input, int level_mode, long *read_count, long *chunk_count, | |
455 uint64_t *chunk_size, long key_count[NCHUNKS][NKEYS], | |
456 long type_count[NCHUNKS][NTYPES], HashTable *regn_hash, | |
457 uint64_t *base_count) { | |
458 srf_t *srf; | |
459 off_t pos; | |
460 int type; | |
461 int count = 0; | |
462 long trace_body_count = 0; | |
463 char name[1024]; | |
464 | |
465 if (NULL == (srf = srf_open(input, "rb"))) { | |
466 perror(input); | |
467 return 1; | |
468 } | |
469 | |
470 while ((type = srf_next_block_type(srf)) >= 0) { | |
471 | |
472 switch (type) { | |
473 case SRFB_CONTAINER: | |
474 if( trace_body_count ){ | |
475 if( level_mode & LEVEL_NAME ) | |
476 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); | |
477 trace_body_count = 0; | |
478 } | |
479 if (0 != srf_read_cont_hdr(srf, &srf->ch)) { | |
480 fprintf(stderr, "Error reading container header.\nExiting.\n"); | |
481 exit(1); | |
482 } | |
483 break; | |
484 | |
485 case SRFB_XML: | |
486 if( trace_body_count ){ | |
487 if( level_mode & LEVEL_NAME ) | |
488 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); | |
489 trace_body_count = 0; | |
490 } | |
491 if (0 != srf_read_xml(srf, &srf->xml)) { | |
492 fprintf(stderr, "Error reading XML.\nExiting.\n"); | |
493 exit(1); | |
494 } | |
495 break; | |
496 | |
497 case SRFB_TRACE_HEADER: | |
498 if( trace_body_count ){ | |
499 if( level_mode & LEVEL_NAME ) | |
500 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); | |
501 trace_body_count = 0; | |
502 } | |
503 if (0 != srf_read_trace_hdr(srf, &srf->th)) { | |
504 fprintf(stderr, "Error reading trace header.\nExiting.\n"); | |
505 exit(1); | |
506 } | |
507 | |
508 if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) ) | |
509 break; | |
510 | |
511 /* Decode ZTR chunks in the header */ | |
512 if (srf->mf) | |
513 mfdestroy(srf->mf); | |
514 | |
515 srf->mf = mfcreate(NULL, 0); | |
516 if (srf->th.trace_hdr_size) | |
517 mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, srf->mf); | |
518 if (srf->ztr) | |
519 delete_ztr(srf->ztr); | |
520 mrewind(srf->mf); | |
521 | |
522 if (NULL != (srf->ztr = partial_decode_ztr(srf, srf->mf, NULL))) { | |
523 srf->mf_pos = mftell(srf->mf); | |
524 } else { | |
525 /* Maybe not enough to decode or no headerBlob. */ | |
526 /* So delay until decoding the body. */ | |
527 srf->mf_pos = 0; | |
528 } | |
529 mfseek(srf->mf, 0, SEEK_END); | |
530 srf->mf_end = mftell(srf->mf); | |
531 | |
532 break; | |
533 | |
534 case SRFB_TRACE_BODY: { | |
535 srf_trace_body_t old_tb; | |
536 ztr_t *ztr_tmp; | |
537 int no_trace = (level_mode & (LEVEL_CHUNK | LEVEL_BASE) ? 0 : 1); | |
538 | |
539 if (0 != srf_read_trace_body(srf, &old_tb, no_trace)) { | |
540 fprintf(stderr, "Error reading trace body.\nExiting.\n"); | |
541 exit(1); | |
542 } | |
543 | |
544 if (-1 == construct_trace_name(srf->th.id_prefix, | |
545 (unsigned char *)old_tb.read_id, | |
546 old_tb.read_id_length, | |
547 name, 512)) { | |
548 fprintf(stderr, "Error constructing trace name.\nExiting.\n"); | |
549 exit(1); | |
550 } | |
551 | |
552 trace_body_count++; | |
553 if( 1 == trace_body_count ){ | |
554 if( level_mode & LEVEL_NAME ) | |
555 printf( "trace_name: %s + %s", srf->th.id_prefix, name+strlen(srf->th.id_prefix)); | |
556 } | |
557 | |
558 read_count[READ_TOTAL]++; | |
559 | |
560 if (old_tb.flags & SRF_READ_FLAG_BAD_MASK ){ | |
561 read_count[READ_BAD]++; | |
562 } else { | |
563 read_count[READ_GOOD]++; | |
564 } | |
565 | |
566 if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) ) | |
567 break; | |
568 | |
569 if (!srf->mf) { | |
570 fprintf(stderr, "Error reading trace body.\nExiting.\n"); | |
571 exit(1); | |
572 } | |
573 | |
574 mfseek(srf->mf, srf->mf_end, SEEK_SET); | |
575 if (old_tb.trace_size) { | |
576 mfwrite(old_tb.trace, 1, old_tb.trace_size, srf->mf); | |
577 free(old_tb.trace); | |
578 old_tb.trace = NULL; | |
579 } | |
580 | |
581 mftruncate(srf->mf, mftell(srf->mf)); | |
582 mfseek(srf->mf, srf->mf_pos, SEEK_SET); | |
583 | |
584 if (srf->ztr) | |
585 ztr_tmp = ztr_dup(srf->ztr); /* inefficient, but simple */ | |
586 else | |
587 ztr_tmp = NULL; | |
588 | |
589 if ((ztr_tmp = partial_decode_ztr(srf, srf->mf, ztr_tmp))) { | |
590 int i; | |
591 for (i=0; i<ztr_tmp->nchunks; i++) { | |
592 int ichunk = -1; | |
593 | |
594 switch (ztr_tmp->chunk[i].type) { | |
595 case ZTR_TYPE_BASE: | |
596 ichunk = CHUNK_BASE; | |
597 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; | |
598 if( parse_base(ztr_tmp, &ztr_tmp->chunk[i], base_count) ){ | |
599 delete_ztr(ztr_tmp); | |
600 return 1; | |
601 } | |
602 break; | |
603 case ZTR_TYPE_CNF1: | |
604 ichunk = CHUNK_CNF1; | |
605 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; | |
606 break; | |
607 case ZTR_TYPE_CNF4: | |
608 ichunk = CHUNK_CNF4; | |
609 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; | |
610 break; | |
611 case ZTR_TYPE_SAMP: | |
612 ichunk = CHUNK_SAMP; | |
613 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; | |
614 break; | |
615 case ZTR_TYPE_SMP4: | |
616 ichunk = CHUNK_SMP4; | |
617 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; | |
618 break; | |
619 case ZTR_TYPE_REGN: | |
620 ichunk = CHUNK_REGN; | |
621 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength; | |
622 if( NULL == parse_regn(ztr_tmp, &ztr_tmp->chunk[i], regn_hash) ){ | |
623 delete_ztr(ztr_tmp); | |
624 return 1; | |
625 } | |
626 break; | |
627 default: | |
628 break; | |
629 } | |
630 | |
631 if( ichunk > -1 ) { | |
632 chunk_count[ichunk]++; | |
633 count_mdata_keys(ztr_tmp, &ztr_tmp->chunk[i], ichunk, key_count, type_count); | |
634 } | |
635 } | |
636 | |
637 } | |
638 | |
639 if( ztr_tmp ) | |
640 delete_ztr(ztr_tmp); | |
641 | |
642 count++; | |
643 if( (level_mode == LEVEL_CHECK) && (count == 10) ){ | |
644 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); | |
645 srf_destroy(srf, 1); | |
646 return 0; | |
647 } | |
648 | |
649 break; | |
650 } | |
651 | |
652 case SRFB_INDEX: { | |
653 off_t pos = ftell(srf->fp); | |
654 if( trace_body_count ){ | |
655 if( level_mode & LEVEL_NAME ) | |
656 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); | |
657 trace_body_count = 0; | |
658 } | |
659 printf( "Reading srf index block\n"); | |
660 if (0 != srf_read_index_hdr(srf, &srf->hdr, 1)) { | |
661 srf_destroy(srf, 1); | |
662 fprintf(stderr, "Error reading srf index block header.\nExiting.\n"); | |
663 exit(1); | |
664 } | |
665 /* Skip the index body */ | |
666 fseeko(srf->fp, pos + srf->hdr.size, SEEK_SET); | |
667 | |
668 break; | |
669 } | |
670 | |
671 case SRFB_NULL_INDEX: { | |
672 uint64_t ilen; | |
673 if( trace_body_count ){ | |
674 if( level_mode & LEVEL_NAME ) | |
675 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); | |
676 trace_body_count = 0; | |
677 } | |
678 printf( "Reading srf null index block\n"); | |
679 /* | |
680 * Maybe the last 8 bytes of a the file (or previously was | |
681 * last 8 bytes prior to concatenating SRF files together). | |
682 * If so it's the index length and should always be 8 zeros. | |
683 */ | |
684 if (1 != fread(&ilen, 8, 1, srf->fp)) { | |
685 srf_destroy(srf, 1); | |
686 fprintf(stderr, "Error reading srf null index block.\nExiting.\n"); | |
687 exit(1); | |
688 } | |
689 if (ilen != 0) { | |
690 srf_destroy(srf, 1); | |
691 fprintf(stderr, "Invalid srf null index block.\nExiting.\n"); | |
692 exit(1); | |
693 } | |
694 | |
695 break; | |
696 } | |
697 | |
698 default: | |
699 srf_destroy(srf, 1); | |
700 fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type); | |
701 exit(1); | |
702 } | |
703 | |
704 } | |
705 | |
706 if( trace_body_count ){ | |
707 if( level_mode & LEVEL_NAME ) | |
708 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count); | |
709 trace_body_count = 0; | |
710 } | |
711 | |
712 /* the type should be -1 (EOF) */ | |
713 if( type != -1 ) { | |
714 fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type); | |
715 exit(1); | |
716 } | |
717 | |
718 /* are we really at the end of the srf file */ | |
719 pos = ftell(srf->fp); | |
720 fseek(srf->fp, 0, SEEK_END); | |
721 if( pos != ftell(srf->fp) ){ | |
722 fprintf(stderr, "srf file is corrupt\n"); | |
723 exit(1); | |
724 } | |
725 | |
726 srf_destroy(srf, 1); | |
727 return 0; | |
728 } | |
729 | |
730 /* ------------------------------------------------------------------------ */ | |
731 | |
732 /* | |
733 * Main method. | |
734 */ | |
735 int main(int argc, char **argv) { | |
736 int ifile, nfiles; | |
737 char *input = NULL; | |
738 | |
739 int c; | |
740 int errflg = 0; | |
741 extern char *optarg; | |
742 extern int optind, optopt; | |
743 | |
744 int level_mode = LEVEL_ALL; | |
745 | |
746 long read_count[NREADS]; | |
747 char *read_str[] = {READ_GOOD_STR, READ_BAD_STR, READ_TOTAL_STR}; | |
748 long chunk_count[NCHUNKS]; | |
749 uint64_t chunk_size[NCHUNKS]; | |
750 uint4 chunk_type[] = {CHUNK_BASE_TYPE, CHUNK_CNF1_TYPE, CHUNK_CNF4_TYPE, CHUNK_SAMP_TYPE, CHUNK_SMP4_TYPE, CHUNK_REGN_TYPE}; | |
751 long key_count[NCHUNKS][NKEYS]; | |
752 char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR}; | |
753 long type_count[NCHUNKS][NTYPES]; | |
754 char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR}; | |
755 int iread, ichunk, ikey, itype; | |
756 | |
757 while ((c = getopt(argc, argv, "l:")) != -1) { | |
758 switch (c) { | |
759 case 'l': | |
760 if (1 != sscanf(optarg, "%d", &level_mode)) { | |
761 fprintf(stderr, | |
762 "Otion -%c requires an operand\n", optopt); | |
763 errflg++; | |
764 } | |
765 break; | |
766 case ':': /* -? without operand */ | |
767 fprintf(stderr, | |
768 "Option -%c requires an operand\n", optopt); | |
769 errflg++; | |
770 break; | |
771 case '?': | |
772 fprintf(stderr, | |
773 "Unrecognised option: -%c\n", optopt); | |
774 errflg++; | |
775 } | |
776 } | |
777 | |
778 if (errflg) { | |
779 usage(1); | |
780 } | |
781 | |
782 nfiles = (argc-optind); | |
783 if( nfiles < 1 ){ | |
784 fprintf(stderr, "Please specify input archive name(s).\n"); | |
785 usage(1); | |
786 } | |
787 | |
788 for (ifile=0; ifile<nfiles; ifile++) { | |
789 HashTable *regn_hash; | |
790 char bases[] = "ACGTN"; | |
791 uint64_t base_count[5]; | |
792 char type[5]; | |
793 | |
794 input = argv[optind+ifile]; | |
795 printf("Reading archive %s.\n", input); | |
796 | |
797 for (iread=0; iread<NREADS; iread++) | |
798 read_count[iread] = 0; | |
799 | |
800 for (ichunk=0; ichunk<NCHUNKS; ichunk++) { | |
801 chunk_count[ichunk] = 0; | |
802 chunk_size[ichunk] = 0; | |
803 } | |
804 | |
805 for (ichunk=0; ichunk<NCHUNKS; ichunk++) | |
806 for (ikey=0; ikey<NKEYS; ikey++) | |
807 key_count[ichunk][ikey] = 0; | |
808 | |
809 for (ichunk=0; ichunk<NCHUNKS; ichunk++) | |
810 for (itype=0; itype<NTYPES; itype++) | |
811 type_count[ichunk][itype] = 0; | |
812 | |
813 if (NULL == (regn_hash = HashTableCreate(0, HASH_DYNAMIC_SIZE|HASH_FUNC_JENKINS3))) { | |
814 return 1; | |
815 } | |
816 | |
817 memset(base_count, 0, 5 * sizeof(uint64_t)); | |
818 | |
819 if( 0 == srf_info(input, level_mode, read_count, | |
820 chunk_count, chunk_size, | |
821 key_count, type_count, regn_hash, base_count) ){ | |
822 | |
823 /* read counts */ | |
824 if( level_mode & LEVEL_READ ) { | |
825 for (iread=0; iread<NREADS; iread++) { | |
826 if( read_count[iread] ) | |
827 printf("Reads: %s : %ld\n", read_str[iread], read_count[iread]); | |
828 } | |
829 } | |
830 | |
831 /* chunk, key and type counts */ | |
832 if( level_mode & LEVEL_CHUNK ) { | |
833 for (ichunk=0; ichunk<NCHUNKS; ichunk++) { | |
834 if( chunk_count[ichunk] ) { | |
835 printf("Chunk: %s : %ld %"PRId64"\n", | |
836 ZTR_BE2STR(chunk_type[ichunk], type), | |
837 chunk_count[ichunk], chunk_size[ichunk]); | |
838 for (ikey=0; ikey<NKEYS; ikey++) { | |
839 if(key_count[ichunk][ikey]) { | |
840 printf(" Mdata key: %s : %ld\n", keys_str[ikey], key_count[ichunk][ikey]); | |
841 if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) { | |
842 for (itype=0; itype<NTYPES; itype++) | |
843 if(type_count[ichunk][itype]) | |
844 printf(" types: %s : %ld\n", types_str[itype], type_count[ichunk][itype]); | |
845 } | |
846 if (ikey == KEY_NAME && (ichunk == CHUNK_REGN)) { | |
847 int ibucket; | |
848 for (ibucket=0; ibucket<regn_hash->nbuckets; ibucket++) { | |
849 HashItem *hi; | |
850 for (hi = regn_hash->bucket[ibucket]; hi; hi = hi->next) { | |
851 regn_t *regn = (regn_t *)hi->data.p; | |
852 printf(" %s x%d\n", hi->key, regn->count); | |
853 } | |
854 } | |
855 } | |
856 } | |
857 } | |
858 } | |
859 } | |
860 } | |
861 | |
862 /* base counts */ | |
863 if( level_mode & LEVEL_BASE ) { | |
864 uint64_t total = 0; | |
865 int i; | |
866 for (i=0; i<5; i++) { | |
867 if( base_count[i] ){ | |
868 printf("Bases: %c: %"PRId64"\n", | |
869 bases[i], base_count[i]); | |
870 total += base_count[i]; | |
871 } | |
872 } | |
873 printf("Bases: TOTAL: %"PRId64"\n", total); | |
874 } | |
875 } | |
876 } | |
877 | |
878 return 0; | |
879 } |