comparison srf2fastq/io_lib-1.12.2/progs/srf_info.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 /*
2 * ======================================================================
3 * This software has been created by Genome Research Limited (GRL).
4 *
5 * GRL hereby grants permission to use, copy, modify and distribute
6 * this software and its documentation for non-commercial purposes
7 * without fee at the user's own risk on the basis set out below.
8 *
9 * GRL neither undertakes nor accepts any duty whether contractual or
10 * otherwise in connection with the software, its use or the use of
11 * any derivative, and makes no representations or warranties, express
12 * or implied, concerning the software, its suitability, fitness for
13 * a particular purpose or non-infringement.
14 *
15 * In no event shall the authors of the software or GRL be responsible
16 * or liable for any loss or damage whatsoever arising in any way
17 * directly or indirectly out of the use of this software or its
18 * derivatives, even if advised of the possibility of such damage.
19 *
20 * Our software can be freely distributed under the conditions set out
21 * above, and must contain this copyright notice.
22 * ======================================================================
23 */
24
25 /*
26 * This performs a linear (non-indexed) search for a trace in an SRF archive.
27 *
28 * It's not intended as a suitable production program or as a library of code
29 * to use, but as a test and benchmark statistic.
30 */
31
32 #include <string.h>
33 #include <stdio.h>
34 #include <unistd.h>
35 #include <ctype.h>
36 #include <sys/stat.h>
37 #include <sys/types.h>
38 #include <io_lib/Read.h>
39 #include <io_lib/misc.h>
40 #include <io_lib/ztr.h>
41 #include <io_lib/srf.h>
42 #include <io_lib/hash_table.h>
43
44 #define LEVEL_READ (1 << 0)
45 #define LEVEL_CHUNK (1 << 1)
46 #define LEVEL_NAME (1 << 2)
47 #define LEVEL_BASE (1 << 3)
48 #define LEVEL_ALL (LEVEL_READ | LEVEL_CHUNK | LEVEL_NAME | LEVEL_BASE );
49
50 /* only checks the first 10 traces */
51 #define LEVEL_CHECK 255
52
53 #define READ_GOOD 0
54 #define READ_BAD 1
55 #define READ_TOTAL 2
56
57 #define NREADS 3
58
59 #define READ_GOOD_STR "GOOD"
60 #define READ_BAD_STR "BAD"
61 #define READ_TOTAL_STR "TOTAL"
62
63 /* see ztr.h for a list of all possible ztr chunk types */
64
65 #define CHUNK_BASE 0
66 #define CHUNK_CNF1 1
67 #define CHUNK_CNF4 2
68 #define CHUNK_SAMP 3
69 #define CHUNK_SMP4 4
70 #define CHUNK_REGN 5
71
72 #define NCHUNKS 6
73
74 #define CHUNK_BASE_TYPE ZTR_TYPE_BASE
75 #define CHUNK_CNF1_TYPE ZTR_TYPE_CNF1
76 #define CHUNK_CNF4_TYPE ZTR_TYPE_CNF4
77 #define CHUNK_SAMP_TYPE ZTR_TYPE_SAMP
78 #define CHUNK_SMP4_TYPE ZTR_TYPE_SMP4
79 #define CHUNK_REGN_TYPE ZTR_TYPE_REGN
80
81 #define KEY_TYPE 0
82 #define KEY_VALTYPE 1
83 #define KEY_GROUP 2
84 #define KEY_OFFS 3
85 #define KEY_SCALE 4
86 #define KEY_COORD 5
87 #define KEY_NAME 6
88
89 #define NKEYS 7
90
91 #define KEY_TYPE_STR "TYPE"
92 #define KEY_VALTYPE_STR "VALTYPE"
93 #define KEY_GROUP_STR "GROUP"
94 #define KEY_OFFS_STR "OFFS"
95 #define KEY_SCALE_STR "SCALE"
96 #define KEY_COORD_STR "COORD"
97 #define KEY_NAME_STR "NAME"
98
99 #define TYPE_PROC 0
100 #define TYPE_SLXI 1
101 #define TYPE_SLXN 2
102 #define TYPE_0FAM 3
103 #define TYPE_1CY3 4
104 #define TYPE_2TXR 5
105 #define TYPE_3CY5 6
106
107 #define NTYPES 7
108
109 #define TYPE_PROC_STR "PROC"
110 #define TYPE_SLXI_STR "SLXI"
111 #define TYPE_SLXN_STR "SLXN"
112 #define TYPE_0FAM_STR "0FAM"
113 #define TYPE_1CY3_STR "1CY3"
114 #define TYPE_2TXR_STR "2TXR"
115 #define TYPE_3CY5_STR "3CY5"
116
117 #define MAX_REGIONS 4
118
119 /* regn chunk */
120 typedef struct {
121 char coord;
122 char *region_names;
123 int nregions;
124 char *name[MAX_REGIONS];
125 char code[MAX_REGIONS];
126 int start[MAX_REGIONS];
127 int length[MAX_REGIONS];
128 int count;
129 } regn_t;
130
131 /* ------------------------------------------------------------------------ */
132 /*
133 * Print usage message to stderr and exit with the given \"code\".
134 */
135 void usage(int code) {
136 fprintf(stderr, "Usage: srf_info [-level level_bitmap] input(s)\n");
137 fprintf(stderr, "Options:\n");
138 fprintf(stderr, " -l level_bitmap \n");
139 fprintf(stderr, " 1\tCount of good/bad reads.\n");
140 fprintf(stderr, " 2\tCounts for selected chunk types.\n");
141 fprintf(stderr, " 4\tTrace count and trace name prefix for each trace_header.\n");
142 fprintf(stderr, " 8\tBase count.\n");
143 fprintf(stderr, "\n");
144
145 exit(code);
146 }
147
148 /*
149 * Parse the REGN chunk, add to regn HASH
150 *
151 * Returns corresponding HashItem * from regn Hash
152 */
153 HashItem *parse_regn(ztr_t *z, ztr_chunk_t *chunk, HashTable *regn_hash) {
154 char key[1024];
155 char *name;
156 HashItem *hi;
157 regn_t *regn;
158 size_t l;
159
160 uncompress_chunk(z, chunk);
161
162 /* the hash key is a combination of the region names and boundaries */
163 name = ztr_lookup_mdata_value(z, chunk, "NAME");
164 l = snprintf(key, sizeof(key), "names=%s", name);
165 if( chunk->dlength ){
166 int nbndy = (chunk->dlength-1)/4;
167 uint4 *bndy = (uint4 *)(chunk->data+1);
168 int ibndy;
169 for (ibndy=0; ibndy<nbndy; ibndy++) {
170 if( ibndy )
171 l += snprintf(key + l, sizeof(key) - l,
172 ";%d", be_int4(bndy[ibndy]));
173 else
174 l += snprintf(key + l, sizeof(key) - l,
175 " boundaries=%d", be_int4(bndy[ibndy]));
176 }
177 }
178
179 if (NULL == (hi = (HashTableSearch(regn_hash, key, strlen(key))))) {
180 int iregion, nregions = 0;
181 char *coord;
182 char *cp1;
183 uint4 bndy[MAX_REGIONS];
184 int ibndy, nbndy = 0;
185 HashData hd;
186
187 if( NULL == (regn = (regn_t *)malloc(sizeof(regn_t)))) {
188 return NULL;
189 }
190
191 coord = ztr_lookup_mdata_value(z, chunk, "COORD");
192 regn->coord = (NULL == coord ? 'B' : *coord );
193
194 regn->region_names = strdup(name);
195
196 cp1 = strtok (regn->region_names,";");
197 while(cp1) {
198 char *cp2;
199 if(NULL == (cp2 = strchr(cp1,':'))) {
200 fprintf(stderr, "Invalid region name/code pair %s\n", cp1);
201 return NULL;
202 }
203 *cp2++ = '\0';
204 regn->name[nregions] = cp1;
205 regn->code[nregions] = *cp2;
206 nregions++;
207 cp1 = strtok (NULL, ";");
208 }
209
210 regn->nregions = nregions;
211
212 if( chunk->dlength ) {
213 nbndy = (chunk->dlength-1)/4;
214 memcpy(bndy, chunk->data+1, chunk->dlength-1);
215 }
216
217 for( iregion=0, ibndy=0; iregion<nregions; iregion++) {
218 /* start = (start + length of previous region) or 0 if no previous region */
219 /* length = (next boundary - start of region) or -1 if no next boundary */
220 if( regn->code[iregion] == 'E' ){
221 /* no sequence, length = 0 */
222 regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
223 regn->length[iregion] = 0;
224 }else{
225 if( ibndy > nbndy ){
226 fprintf(stderr, "More name/code pairs than boundaries\n");
227 return NULL;
228 }
229 regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
230 regn->length[iregion] = (ibndy == nbndy ? -1 : (be_int4(bndy[ibndy])-regn->start[iregion]));
231 ibndy++;
232 }
233 }
234
235 regn->count = 1;
236
237 hd.p = regn;
238 if (NULL == (hi = HashTableAdd(regn_hash, key, strlen(key), hd, NULL))) {
239 free(regn->region_names);
240 free(regn);
241 return NULL;
242 }
243 } else {
244 regn = (regn_t *)(hi->data.p);
245 regn->count++;
246 }
247
248 return hi;
249 }
250
251 /*
252 * Parse the sequence
253 *
254 * Returns 0 on success.
255 */
256 int parse_base(ztr_t *z, ztr_chunk_t *chunk, uint64_t *base_count) {
257 int i;
258
259 uncompress_chunk(z, chunk);
260
261 for (i = 1; i < chunk->dlength; i++) {
262 char base = chunk->data[i];
263 uint1 key;
264 switch (base) {
265 case 'A': case 'a':
266 key = 0;
267 break;
268 case 'C': case 'c':
269 key = 1;
270 break;
271 case 'G': case 'g':
272 key = 2;
273 break;
274 case 'T': case 't':
275 key = 3;
276 break;
277 default:
278 key = 4;
279 break;
280 }
281 base_count[key]++;
282 }
283
284 return 0;
285 }
286
287 /*
288 * count the mdata keys
289 *
290 * Returns 0 on success.
291 */
292 int count_mdata_keys(ztr_t *z, ztr_chunk_t *chunk, int ichunk, long key_count[NCHUNKS][NKEYS], long type_count[NCHUNKS][NTYPES]) {
293 char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
294 char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
295 int ikey, itype;
296
297 if (z->header.version_major > 1 ||
298 z->header.version_minor >= 2) {
299 /* ZTR format 1.2 onwards */
300
301 char *cp = chunk->mdata;
302 int32_t dlen = chunk->mdlength;
303
304 /*
305 * NB: we may wish to rewrite this using a dedicated state machine
306 * instead of strlen/strcmp as this currently assumes the meta-
307 * data is correctly formatted, which we cannot assume as the
308 * metadata is external and outside of our control.
309 * Passing in non-nul terminated strings could crash this code.
310 */
311 while (dlen > 0) {
312 size_t l;
313
314 /* key */
315 l = strlen(cp);
316 for (ikey=0; ikey<NKEYS; ikey++)
317 if(0 == strcmp(cp, keys_str[ikey]))
318 break;
319
320 cp += l+1;
321 dlen -= l+1;
322
323 /* value */
324 if (ikey < NKEYS)
325 key_count[ichunk][ikey]++;
326
327 /* for the type key check the value */
328 if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
329 for (itype=0; itype<NTYPES; itype++)
330 if(0 == strcmp(cp, types_str[itype]))
331 break;
332 if(itype < NTYPES)
333 type_count[ichunk][itype]++;
334 }
335
336 l = strlen(cp);
337 cp += l+1;
338 dlen -= l+1;
339 }
340
341 } else {
342 /* v1.1 and before only supported a few types, specifically coded
343 * per chunk type.
344 */
345
346 switch (chunk->type) {
347 case ZTR_TYPE_SAMP:
348 case ZTR_TYPE_SMP4:
349 key_count[ichunk][KEY_TYPE]++;
350 for (itype=0; itype<NTYPES; itype++)
351 if(0 == strcmp(chunk->mdata, types_str[itype])) {
352 type_count[ichunk][itype]++;
353 }
354 break;
355
356 default:
357 break;
358 }
359 }
360
361 return 0;
362 }
363
364 /*
365 * As per partial_decode_ztr in srf.c, but without uncompress_ztr.
366 */
367 ztr_t *partial_decode_ztr2(srf_t *srf, mFILE *mf, ztr_t *z) {
368 ztr_t *ztr;
369 ztr_chunk_t *chunk;
370 long pos = 0;
371
372 if (z) {
373 /* Use existing ZTR object => already loaded header */
374 ztr = z;
375
376 } else {
377 /* Allocate or use existing ztr */
378 if (NULL == (ztr = new_ztr()))
379 return NULL;
380
381 /* Read the header */
382 if (-1 == ztr_read_header(mf, &ztr->header)) {
383 if (!z)
384 delete_ztr(ztr);
385 mrewind(mf);
386 return NULL;
387 }
388
389 /* Check magic number and version */
390 if (memcmp(ztr->header.magic, ZTR_MAGIC, 8) != 0) {
391 if (!z)
392 delete_ztr(ztr);
393 mrewind(mf);
394 return NULL;
395 }
396
397 if (ztr->header.version_major != ZTR_VERSION_MAJOR) {
398 if (!z)
399 delete_ztr(ztr);
400 mrewind(mf);
401 return NULL;
402 }
403 }
404
405 /* Load chunks */
406 pos = mftell(mf);
407 while (chunk = ztr_read_chunk_hdr(mf)) {
408 chunk->data = (char *)xmalloc(chunk->dlength);
409 if (chunk->dlength != mfread(chunk->data, 1, chunk->dlength, mf))
410 break;
411
412 ztr->nchunks++;
413 ztr->chunk = (ztr_chunk_t *)xrealloc(ztr->chunk, ztr->nchunks *
414 sizeof(ztr_chunk_t));
415 memcpy(&ztr->chunk[ztr->nchunks-1], chunk, sizeof(*chunk));
416 xfree(chunk);
417 pos = mftell(mf);
418 }
419
420 /*
421 * At this stage we're 'pos' into the mFILE mf with any remainder being
422 * a partial block.
423 */
424 if (0 == ztr->nchunks) {
425 if (!z)
426 delete_ztr(ztr);
427 mrewind(mf);
428 return NULL;
429 }
430
431 /* Ensure we exit at the start of a ztr CHUNK */
432 mfseek(mf, pos, SEEK_SET);
433
434 /* If this is the header part, ensure we uncompress and init. data */
435 if (!z) {
436 /* Force caching of huffman code_sets */
437 ztr_find_hcode(ztr, CODE_USER);
438
439 /* And uncompress the rest */
440 uncompress_ztr(ztr);
441 }
442
443 return ztr;
444 }
445
446 /*
447 * Given the archive name and the level_mode
448 * generate information about the archive
449 *
450 * Note the generated srf file is NOT indexed
451 *
452 * Returns 0 on success.
453 */
454 int srf_info(char *input, int level_mode, long *read_count, long *chunk_count,
455 uint64_t *chunk_size, long key_count[NCHUNKS][NKEYS],
456 long type_count[NCHUNKS][NTYPES], HashTable *regn_hash,
457 uint64_t *base_count) {
458 srf_t *srf;
459 off_t pos;
460 int type;
461 int count = 0;
462 long trace_body_count = 0;
463 char name[1024];
464
465 if (NULL == (srf = srf_open(input, "rb"))) {
466 perror(input);
467 return 1;
468 }
469
470 while ((type = srf_next_block_type(srf)) >= 0) {
471
472 switch (type) {
473 case SRFB_CONTAINER:
474 if( trace_body_count ){
475 if( level_mode & LEVEL_NAME )
476 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
477 trace_body_count = 0;
478 }
479 if (0 != srf_read_cont_hdr(srf, &srf->ch)) {
480 fprintf(stderr, "Error reading container header.\nExiting.\n");
481 exit(1);
482 }
483 break;
484
485 case SRFB_XML:
486 if( trace_body_count ){
487 if( level_mode & LEVEL_NAME )
488 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
489 trace_body_count = 0;
490 }
491 if (0 != srf_read_xml(srf, &srf->xml)) {
492 fprintf(stderr, "Error reading XML.\nExiting.\n");
493 exit(1);
494 }
495 break;
496
497 case SRFB_TRACE_HEADER:
498 if( trace_body_count ){
499 if( level_mode & LEVEL_NAME )
500 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
501 trace_body_count = 0;
502 }
503 if (0 != srf_read_trace_hdr(srf, &srf->th)) {
504 fprintf(stderr, "Error reading trace header.\nExiting.\n");
505 exit(1);
506 }
507
508 if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
509 break;
510
511 /* Decode ZTR chunks in the header */
512 if (srf->mf)
513 mfdestroy(srf->mf);
514
515 srf->mf = mfcreate(NULL, 0);
516 if (srf->th.trace_hdr_size)
517 mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, srf->mf);
518 if (srf->ztr)
519 delete_ztr(srf->ztr);
520 mrewind(srf->mf);
521
522 if (NULL != (srf->ztr = partial_decode_ztr(srf, srf->mf, NULL))) {
523 srf->mf_pos = mftell(srf->mf);
524 } else {
525 /* Maybe not enough to decode or no headerBlob. */
526 /* So delay until decoding the body. */
527 srf->mf_pos = 0;
528 }
529 mfseek(srf->mf, 0, SEEK_END);
530 srf->mf_end = mftell(srf->mf);
531
532 break;
533
534 case SRFB_TRACE_BODY: {
535 srf_trace_body_t old_tb;
536 ztr_t *ztr_tmp;
537 int no_trace = (level_mode & (LEVEL_CHUNK | LEVEL_BASE) ? 0 : 1);
538
539 if (0 != srf_read_trace_body(srf, &old_tb, no_trace)) {
540 fprintf(stderr, "Error reading trace body.\nExiting.\n");
541 exit(1);
542 }
543
544 if (-1 == construct_trace_name(srf->th.id_prefix,
545 (unsigned char *)old_tb.read_id,
546 old_tb.read_id_length,
547 name, 512)) {
548 fprintf(stderr, "Error constructing trace name.\nExiting.\n");
549 exit(1);
550 }
551
552 trace_body_count++;
553 if( 1 == trace_body_count ){
554 if( level_mode & LEVEL_NAME )
555 printf( "trace_name: %s + %s", srf->th.id_prefix, name+strlen(srf->th.id_prefix));
556 }
557
558 read_count[READ_TOTAL]++;
559
560 if (old_tb.flags & SRF_READ_FLAG_BAD_MASK ){
561 read_count[READ_BAD]++;
562 } else {
563 read_count[READ_GOOD]++;
564 }
565
566 if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
567 break;
568
569 if (!srf->mf) {
570 fprintf(stderr, "Error reading trace body.\nExiting.\n");
571 exit(1);
572 }
573
574 mfseek(srf->mf, srf->mf_end, SEEK_SET);
575 if (old_tb.trace_size) {
576 mfwrite(old_tb.trace, 1, old_tb.trace_size, srf->mf);
577 free(old_tb.trace);
578 old_tb.trace = NULL;
579 }
580
581 mftruncate(srf->mf, mftell(srf->mf));
582 mfseek(srf->mf, srf->mf_pos, SEEK_SET);
583
584 if (srf->ztr)
585 ztr_tmp = ztr_dup(srf->ztr); /* inefficient, but simple */
586 else
587 ztr_tmp = NULL;
588
589 if ((ztr_tmp = partial_decode_ztr(srf, srf->mf, ztr_tmp))) {
590 int i;
591 for (i=0; i<ztr_tmp->nchunks; i++) {
592 int ichunk = -1;
593
594 switch (ztr_tmp->chunk[i].type) {
595 case ZTR_TYPE_BASE:
596 ichunk = CHUNK_BASE;
597 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
598 if( parse_base(ztr_tmp, &ztr_tmp->chunk[i], base_count) ){
599 delete_ztr(ztr_tmp);
600 return 1;
601 }
602 break;
603 case ZTR_TYPE_CNF1:
604 ichunk = CHUNK_CNF1;
605 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
606 break;
607 case ZTR_TYPE_CNF4:
608 ichunk = CHUNK_CNF4;
609 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
610 break;
611 case ZTR_TYPE_SAMP:
612 ichunk = CHUNK_SAMP;
613 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
614 break;
615 case ZTR_TYPE_SMP4:
616 ichunk = CHUNK_SMP4;
617 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
618 break;
619 case ZTR_TYPE_REGN:
620 ichunk = CHUNK_REGN;
621 chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
622 if( NULL == parse_regn(ztr_tmp, &ztr_tmp->chunk[i], regn_hash) ){
623 delete_ztr(ztr_tmp);
624 return 1;
625 }
626 break;
627 default:
628 break;
629 }
630
631 if( ichunk > -1 ) {
632 chunk_count[ichunk]++;
633 count_mdata_keys(ztr_tmp, &ztr_tmp->chunk[i], ichunk, key_count, type_count);
634 }
635 }
636
637 }
638
639 if( ztr_tmp )
640 delete_ztr(ztr_tmp);
641
642 count++;
643 if( (level_mode == LEVEL_CHECK) && (count == 10) ){
644 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
645 srf_destroy(srf, 1);
646 return 0;
647 }
648
649 break;
650 }
651
652 case SRFB_INDEX: {
653 off_t pos = ftell(srf->fp);
654 if( trace_body_count ){
655 if( level_mode & LEVEL_NAME )
656 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
657 trace_body_count = 0;
658 }
659 printf( "Reading srf index block\n");
660 if (0 != srf_read_index_hdr(srf, &srf->hdr, 1)) {
661 srf_destroy(srf, 1);
662 fprintf(stderr, "Error reading srf index block header.\nExiting.\n");
663 exit(1);
664 }
665 /* Skip the index body */
666 fseeko(srf->fp, pos + srf->hdr.size, SEEK_SET);
667
668 break;
669 }
670
671 case SRFB_NULL_INDEX: {
672 uint64_t ilen;
673 if( trace_body_count ){
674 if( level_mode & LEVEL_NAME )
675 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
676 trace_body_count = 0;
677 }
678 printf( "Reading srf null index block\n");
679 /*
680 * Maybe the last 8 bytes of a the file (or previously was
681 * last 8 bytes prior to concatenating SRF files together).
682 * If so it's the index length and should always be 8 zeros.
683 */
684 if (1 != fread(&ilen, 8, 1, srf->fp)) {
685 srf_destroy(srf, 1);
686 fprintf(stderr, "Error reading srf null index block.\nExiting.\n");
687 exit(1);
688 }
689 if (ilen != 0) {
690 srf_destroy(srf, 1);
691 fprintf(stderr, "Invalid srf null index block.\nExiting.\n");
692 exit(1);
693 }
694
695 break;
696 }
697
698 default:
699 srf_destroy(srf, 1);
700 fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
701 exit(1);
702 }
703
704 }
705
706 if( trace_body_count ){
707 if( level_mode & LEVEL_NAME )
708 printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
709 trace_body_count = 0;
710 }
711
712 /* the type should be -1 (EOF) */
713 if( type != -1 ) {
714 fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
715 exit(1);
716 }
717
718 /* are we really at the end of the srf file */
719 pos = ftell(srf->fp);
720 fseek(srf->fp, 0, SEEK_END);
721 if( pos != ftell(srf->fp) ){
722 fprintf(stderr, "srf file is corrupt\n");
723 exit(1);
724 }
725
726 srf_destroy(srf, 1);
727 return 0;
728 }
729
730 /* ------------------------------------------------------------------------ */
731
732 /*
733 * Main method.
734 */
735 int main(int argc, char **argv) {
736 int ifile, nfiles;
737 char *input = NULL;
738
739 int c;
740 int errflg = 0;
741 extern char *optarg;
742 extern int optind, optopt;
743
744 int level_mode = LEVEL_ALL;
745
746 long read_count[NREADS];
747 char *read_str[] = {READ_GOOD_STR, READ_BAD_STR, READ_TOTAL_STR};
748 long chunk_count[NCHUNKS];
749 uint64_t chunk_size[NCHUNKS];
750 uint4 chunk_type[] = {CHUNK_BASE_TYPE, CHUNK_CNF1_TYPE, CHUNK_CNF4_TYPE, CHUNK_SAMP_TYPE, CHUNK_SMP4_TYPE, CHUNK_REGN_TYPE};
751 long key_count[NCHUNKS][NKEYS];
752 char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
753 long type_count[NCHUNKS][NTYPES];
754 char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
755 int iread, ichunk, ikey, itype;
756
757 while ((c = getopt(argc, argv, "l:")) != -1) {
758 switch (c) {
759 case 'l':
760 if (1 != sscanf(optarg, "%d", &level_mode)) {
761 fprintf(stderr,
762 "Otion -%c requires an operand\n", optopt);
763 errflg++;
764 }
765 break;
766 case ':': /* -? without operand */
767 fprintf(stderr,
768 "Option -%c requires an operand\n", optopt);
769 errflg++;
770 break;
771 case '?':
772 fprintf(stderr,
773 "Unrecognised option: -%c\n", optopt);
774 errflg++;
775 }
776 }
777
778 if (errflg) {
779 usage(1);
780 }
781
782 nfiles = (argc-optind);
783 if( nfiles < 1 ){
784 fprintf(stderr, "Please specify input archive name(s).\n");
785 usage(1);
786 }
787
788 for (ifile=0; ifile<nfiles; ifile++) {
789 HashTable *regn_hash;
790 char bases[] = "ACGTN";
791 uint64_t base_count[5];
792 char type[5];
793
794 input = argv[optind+ifile];
795 printf("Reading archive %s.\n", input);
796
797 for (iread=0; iread<NREADS; iread++)
798 read_count[iread] = 0;
799
800 for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
801 chunk_count[ichunk] = 0;
802 chunk_size[ichunk] = 0;
803 }
804
805 for (ichunk=0; ichunk<NCHUNKS; ichunk++)
806 for (ikey=0; ikey<NKEYS; ikey++)
807 key_count[ichunk][ikey] = 0;
808
809 for (ichunk=0; ichunk<NCHUNKS; ichunk++)
810 for (itype=0; itype<NTYPES; itype++)
811 type_count[ichunk][itype] = 0;
812
813 if (NULL == (regn_hash = HashTableCreate(0, HASH_DYNAMIC_SIZE|HASH_FUNC_JENKINS3))) {
814 return 1;
815 }
816
817 memset(base_count, 0, 5 * sizeof(uint64_t));
818
819 if( 0 == srf_info(input, level_mode, read_count,
820 chunk_count, chunk_size,
821 key_count, type_count, regn_hash, base_count) ){
822
823 /* read counts */
824 if( level_mode & LEVEL_READ ) {
825 for (iread=0; iread<NREADS; iread++) {
826 if( read_count[iread] )
827 printf("Reads: %s : %ld\n", read_str[iread], read_count[iread]);
828 }
829 }
830
831 /* chunk, key and type counts */
832 if( level_mode & LEVEL_CHUNK ) {
833 for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
834 if( chunk_count[ichunk] ) {
835 printf("Chunk: %s : %ld %"PRId64"\n",
836 ZTR_BE2STR(chunk_type[ichunk], type),
837 chunk_count[ichunk], chunk_size[ichunk]);
838 for (ikey=0; ikey<NKEYS; ikey++) {
839 if(key_count[ichunk][ikey]) {
840 printf(" Mdata key: %s : %ld\n", keys_str[ikey], key_count[ichunk][ikey]);
841 if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
842 for (itype=0; itype<NTYPES; itype++)
843 if(type_count[ichunk][itype])
844 printf(" types: %s : %ld\n", types_str[itype], type_count[ichunk][itype]);
845 }
846 if (ikey == KEY_NAME && (ichunk == CHUNK_REGN)) {
847 int ibucket;
848 for (ibucket=0; ibucket<regn_hash->nbuckets; ibucket++) {
849 HashItem *hi;
850 for (hi = regn_hash->bucket[ibucket]; hi; hi = hi->next) {
851 regn_t *regn = (regn_t *)hi->data.p;
852 printf(" %s x%d\n", hi->key, regn->count);
853 }
854 }
855 }
856 }
857 }
858 }
859 }
860 }
861
862 /* base counts */
863 if( level_mode & LEVEL_BASE ) {
864 uint64_t total = 0;
865 int i;
866 for (i=0; i<5; i++) {
867 if( base_count[i] ){
868 printf("Bases: %c: %"PRId64"\n",
869 bases[i], base_count[i]);
870 total += base_count[i];
871 }
872 }
873 printf("Bases: TOTAL: %"PRId64"\n", total);
874 }
875 }
876 }
877
878 return 0;
879 }