Mercurial > repos > dawe > srf2fastq

diff srf2fastq/io_lib-1.12.2/progs/srf_info.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author: dawe
date: Tue, 07 Jun 2011 17:48:05 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/srf2fastq/io_lib-1.12.2/progs/srf_info.c	Tue Jun 07 17:48:05 2011 -0400
@@ -0,0 +1,879 @@
+/*
+ * ======================================================================
+ * This software has been created by Genome Research Limited (GRL).
+ *
+ * GRL hereby grants permission to use, copy, modify and distribute
+ * this software and its documentation for non-commercial purposes
+ * without fee at the user's own risk on the basis set out below.
+ *
+ * GRL neither undertakes nor accepts any duty whether contractual or
+ * otherwise in connection with the software, its use or the use of
+ * any derivative, and makes no representations or warranties, express
+ * or implied, concerning the software, its suitability, fitness for
+ * a particular purpose or non-infringement.
+ *
+ * In no event shall the authors of the software or GRL be responsible
+ * or liable for any loss or damage whatsoever arising in any way
+ * directly or indirectly out of the use of this software or its
+ * derivatives, even if advised of the possibility of such damage.
+ *
+ * Our software can be freely distributed under the conditions set out
+ * above, and must contain this copyright notice.
+ * ======================================================================
+ */
+
+/*
+ * This performs a linear (non-indexed) search for a trace in an SRF archive.
+ *
+ * It's not intended as a suitable production program or as a library of code
+ * to use, but as a test and benchmark statistic.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <io_lib/Read.h>
+#include <io_lib/misc.h>
+#include <io_lib/ztr.h>
+#include <io_lib/srf.h>
+#include <io_lib/hash_table.h>
+
+#define LEVEL_READ  (1 << 0)
+#define LEVEL_CHUNK (1 << 1)
+#define LEVEL_NAME  (1 << 2)
+#define LEVEL_BASE  (1 << 3)
+#define LEVEL_ALL   (LEVEL_READ | LEVEL_CHUNK | LEVEL_NAME | LEVEL_BASE );
+
+/* only checks the first 10 traces */
+#define LEVEL_CHECK 255 
+
+#define READ_GOOD  0
+#define READ_BAD   1
+#define READ_TOTAL 2
+
+#define NREADS 3
+
+#define READ_GOOD_STR   "GOOD"
+#define READ_BAD_STR    "BAD"
+#define READ_TOTAL_STR  "TOTAL"
+
+/* see ztr.h for a list of all possible ztr chunk types */
+
+#define CHUNK_BASE 0
+#define CHUNK_CNF1 1
+#define CHUNK_CNF4 2
+#define CHUNK_SAMP 3
+#define CHUNK_SMP4 4
+#define CHUNK_REGN 5
+
+#define NCHUNKS 6
+
+#define CHUNK_BASE_TYPE ZTR_TYPE_BASE
+#define CHUNK_CNF1_TYPE ZTR_TYPE_CNF1
+#define CHUNK_CNF4_TYPE ZTR_TYPE_CNF4
+#define CHUNK_SAMP_TYPE ZTR_TYPE_SAMP
+#define CHUNK_SMP4_TYPE ZTR_TYPE_SMP4
+#define CHUNK_REGN_TYPE ZTR_TYPE_REGN
+
+#define KEY_TYPE    0
+#define KEY_VALTYPE 1
+#define KEY_GROUP   2
+#define KEY_OFFS    3
+#define KEY_SCALE   4
+#define KEY_COORD   5
+#define KEY_NAME    6
+
+#define NKEYS 7
+
+#define KEY_TYPE_STR    "TYPE"
+#define KEY_VALTYPE_STR "VALTYPE"
+#define KEY_GROUP_STR   "GROUP"
+#define KEY_OFFS_STR    "OFFS"
+#define KEY_SCALE_STR   "SCALE"
+#define KEY_COORD_STR   "COORD"
+#define KEY_NAME_STR    "NAME"
+
+#define TYPE_PROC 0
+#define TYPE_SLXI 1
+#define TYPE_SLXN 2
+#define TYPE_0FAM 3
+#define TYPE_1CY3 4
+#define TYPE_2TXR 5
+#define TYPE_3CY5 6
+
+#define NTYPES 7
+
+#define TYPE_PROC_STR "PROC"
+#define TYPE_SLXI_STR "SLXI"
+#define TYPE_SLXN_STR "SLXN"
+#define TYPE_0FAM_STR "0FAM"
+#define TYPE_1CY3_STR "1CY3"
+#define TYPE_2TXR_STR "2TXR"
+#define TYPE_3CY5_STR "3CY5"
+
+#define MAX_REGIONS   4
+
+/* regn chunk */
+typedef struct {
+    char coord;
+    char *region_names;
+    int nregions;
+    char *name[MAX_REGIONS];
+    char code[MAX_REGIONS];
+    int start[MAX_REGIONS];
+    int length[MAX_REGIONS];
+    int count;
+} regn_t;
+
+/* ------------------------------------------------------------------------ */
+/*
+ * Print usage message to stderr and exit with the given \"code\".
+ */
+void usage(int code) {
+    fprintf(stderr, "Usage: srf_info [-level level_bitmap] input(s)\n");
+    fprintf(stderr, "Options:\n");
+    fprintf(stderr, "    -l level_bitmap \n");
+    fprintf(stderr, "              1\tCount of good/bad reads.\n");
+    fprintf(stderr, "              2\tCounts for selected chunk types.\n");
+    fprintf(stderr, "              4\tTrace count and trace name prefix for each trace_header.\n");
+    fprintf(stderr, "              8\tBase count.\n");
+    fprintf(stderr, "\n");
+
+    exit(code);
+}
+
+/*
+ * Parse the REGN chunk, add to regn HASH
+ *
+ * Returns corresponding HashItem * from regn Hash
+ */
+HashItem *parse_regn(ztr_t *z, ztr_chunk_t *chunk, HashTable *regn_hash) {
+    char key[1024];
+    char *name;
+    HashItem *hi;
+    regn_t *regn;
+    size_t l;
+    
+    uncompress_chunk(z, chunk);
+
+    /* the hash key is a combination of the region names and boundaries */
+    name = ztr_lookup_mdata_value(z, chunk, "NAME");
+    l = snprintf(key, sizeof(key), "names=%s", name);
+    if( chunk->dlength ){
+        int nbndy = (chunk->dlength-1)/4;
+        uint4 *bndy = (uint4 *)(chunk->data+1);
+        int ibndy;
+	for (ibndy=0; ibndy<nbndy; ibndy++) {
+            if( ibndy )
+                l += snprintf(key + l, sizeof(key) - l,
+			      ";%d", be_int4(bndy[ibndy]));
+            else
+                l += snprintf(key + l, sizeof(key) - l,
+			      " boundaries=%d", be_int4(bndy[ibndy]));
+        }
+    }
+
+    if (NULL == (hi = (HashTableSearch(regn_hash, key, strlen(key))))) {
+        int iregion, nregions = 0;
+        char *coord;
+	char *cp1;
+        uint4 bndy[MAX_REGIONS];
+        int ibndy, nbndy = 0;
+        HashData hd;
+
+        if( NULL == (regn = (regn_t *)malloc(sizeof(regn_t)))) {
+	    return NULL;
+	}
+
+	coord = ztr_lookup_mdata_value(z, chunk, "COORD");
+	regn->coord = (NULL == coord ? 'B' : *coord );
+
+	regn->region_names = strdup(name);
+
+        cp1 = strtok (regn->region_names,";");
+        while(cp1) {
+            char *cp2;
+            if(NULL == (cp2 = strchr(cp1,':'))) {
+                fprintf(stderr, "Invalid region name/code pair %s\n", cp1);
+                return NULL;
+            }
+            *cp2++ = '\0';
+            regn->name[nregions] = cp1;
+            regn->code[nregions] = *cp2;
+            nregions++;
+            cp1 = strtok (NULL, ";");
+        }
+
+        regn->nregions = nregions;
+
+	if( chunk->dlength ) {
+            nbndy = (chunk->dlength-1)/4;
+            memcpy(bndy, chunk->data+1, chunk->dlength-1);
+	}
+
+        for( iregion=0, ibndy=0; iregion<nregions; iregion++) {
+            /* start = (start + length of previous region) or 0 if no previous region */
+            /* length = (next boundary - start of region) or -1 if no next boundary */
+            if( regn->code[iregion] == 'E' ){
+                /* no sequence, length = 0 */
+                regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
+                regn->length[iregion] = 0;
+            }else{
+                if( ibndy > nbndy ){
+                    fprintf(stderr, "More name/code pairs than boundaries\n");
+                    return NULL;
+                }
+                regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
+                regn->length[iregion] = (ibndy == nbndy ? -1 : (be_int4(bndy[ibndy])-regn->start[iregion]));
+                ibndy++;
+            }
+        }
+
+        regn->count = 1;
+            
+	hd.p = regn;
+	if (NULL == (hi = HashTableAdd(regn_hash, key, strlen(key), hd, NULL))) {
+	    free(regn->region_names);
+	    free(regn);
+	    return NULL;
+	}
+    } else {
+	regn = (regn_t *)(hi->data.p);
+	regn->count++;
+    }
+
+    return hi;
+}
+
+/*
+ * Parse the sequence
+ *
+ * Returns 0 on success.
+ */
+int parse_base(ztr_t *z, ztr_chunk_t *chunk, uint64_t *base_count) {
+    int i;
+    
+    uncompress_chunk(z, chunk);
+
+    for (i = 1; i < chunk->dlength; i++) {
+	char base = chunk->data[i];
+        uint1 key;
+	switch (base) {
+	case 'A': case 'a':
+            key = 0;
+            break;
+	case 'C': case 'c':
+            key = 1;
+            break;
+	case 'G': case 'g':
+            key = 2;
+            break;
+	case 'T': case 't':
+            key = 3;
+            break;
+	default:
+            key = 4;
+            break;
+	}
+        base_count[key]++;
+    }
+
+    return 0;
+}
+
+/*
+ * count the mdata keys
+ *
+ * Returns 0 on success.
+ */
+int count_mdata_keys(ztr_t *z, ztr_chunk_t *chunk, int ichunk, long key_count[NCHUNKS][NKEYS], long type_count[NCHUNKS][NTYPES]) {
+    char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
+    char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
+    int ikey, itype;
+
+    if (z->header.version_major > 1 ||
+	z->header.version_minor >= 2) {
+	/* ZTR format 1.2 onwards */
+
+	char *cp = chunk->mdata;
+	int32_t dlen = chunk->mdlength;
+
+	/*
+	 * NB: we may wish to rewrite this using a dedicated state machine
+	 * instead of strlen/strcmp as this currently assumes the meta-
+	 * data is correctly formatted, which we cannot assume as the 
+	 * metadata is external and outside of our control.
+	 * Passing in non-nul terminated strings could crash this code.
+	 */
+	while (dlen > 0) {
+	    size_t l;
+
+	    /* key */
+	    l = strlen(cp);
+	    for (ikey=0; ikey<NKEYS; ikey++)
+		if(0 == strcmp(cp, keys_str[ikey]))
+		    break;
+
+	    cp += l+1;
+	    dlen -= l+1;
+
+	    /* value */
+	    if (ikey < NKEYS)
+		key_count[ichunk][ikey]++;
+
+	    /* for the type key check the value */
+	    if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
+		for (itype=0; itype<NTYPES; itype++)
+		    if(0 == strcmp(cp, types_str[itype]))
+			break;
+		if(itype < NTYPES)
+		    type_count[ichunk][itype]++;
+	    }
+
+	    l = strlen(cp);
+	    cp += l+1;
+	    dlen -= l+1;
+	}
+
+    } else {
+	/* v1.1 and before only supported a few types, specifically coded
+	 * per chunk type.
+	 */
+
+	switch (chunk->type) {
+	case ZTR_TYPE_SAMP:
+	case ZTR_TYPE_SMP4:
+	    key_count[ichunk][KEY_TYPE]++;
+	    for (itype=0; itype<NTYPES; itype++)
+		if(0 == strcmp(chunk->mdata, types_str[itype])) {
+		    type_count[ichunk][itype]++;
+		}
+	    break;
+
+	default:
+	    break;
+	}
+    }
+
+    return 0;
+}
+
+/*
+ * As per partial_decode_ztr in srf.c, but without uncompress_ztr.
+ */
+ztr_t *partial_decode_ztr2(srf_t *srf, mFILE *mf, ztr_t *z) {
+    ztr_t *ztr;
+    ztr_chunk_t *chunk;
+    long pos = 0;
+
+    if (z) {
+	/* Use existing ZTR object => already loaded header */
+	ztr = z;
+
+    } else {
+	/* Allocate or use existing ztr */
+	if (NULL == (ztr = new_ztr()))
+	    return NULL;
+
+	/* Read the header */
+	if (-1 == ztr_read_header(mf, &ztr->header)) {
+	    if (!z)
+		delete_ztr(ztr);
+	    mrewind(mf);
+	    return NULL;
+	}
+
+	/* Check magic number and version */
+	if (memcmp(ztr->header.magic, ZTR_MAGIC, 8) != 0) {
+	    if (!z)
+		delete_ztr(ztr);
+	    mrewind(mf);
+	    return NULL;
+	}
+
+	if (ztr->header.version_major != ZTR_VERSION_MAJOR) {
+	    if (!z)
+		delete_ztr(ztr);
+	    mrewind(mf);
+	    return NULL;
+	}
+    }
+
+    /* Load chunks */
+    pos = mftell(mf);
+    while (chunk = ztr_read_chunk_hdr(mf)) {
+	chunk->data = (char *)xmalloc(chunk->dlength);
+	if (chunk->dlength != mfread(chunk->data, 1, chunk->dlength, mf))
+	    break;
+
+	ztr->nchunks++;
+	ztr->chunk = (ztr_chunk_t *)xrealloc(ztr->chunk, ztr->nchunks *
+					     sizeof(ztr_chunk_t));
+	memcpy(&ztr->chunk[ztr->nchunks-1], chunk, sizeof(*chunk));
+	xfree(chunk);
+	pos = mftell(mf);
+    }
+
+    /*
+     * At this stage we're 'pos' into the mFILE mf with any remainder being
+     * a partial block.
+     */
+    if (0 == ztr->nchunks) {
+	if (!z)
+	    delete_ztr(ztr);
+	mrewind(mf);
+	return NULL;
+    }
+
+    /* Ensure we exit at the start of a ztr CHUNK */
+    mfseek(mf, pos, SEEK_SET);
+
+    /* If this is the header part, ensure we uncompress and init. data */
+    if (!z) {
+	/* Force caching of huffman code_sets */
+	ztr_find_hcode(ztr, CODE_USER);
+
+	/* And uncompress the rest */
+	uncompress_ztr(ztr);
+    }
+
+    return ztr;
+}
+
+/*
+ * Given the archive name and the level_mode
+ * generate information about the archive
+ *
+ * Note the generated srf file is NOT indexed
+ *
+ * Returns 0 on success.
+ */
+int srf_info(char *input, int level_mode, long *read_count, long *chunk_count,
+	     uint64_t *chunk_size, long key_count[NCHUNKS][NKEYS],
+	     long type_count[NCHUNKS][NTYPES], HashTable *regn_hash,
+	     uint64_t *base_count) {
+    srf_t *srf;
+    off_t pos;
+    int type;
+    int count = 0;
+    long trace_body_count = 0;
+    char name[1024];
+
+    if (NULL == (srf = srf_open(input, "rb"))) {
+	perror(input);
+	return 1;
+    }
+
+    while ((type = srf_next_block_type(srf)) >= 0) {
+
+      switch (type) {
+	case SRFB_CONTAINER:
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+	    if (0 != srf_read_cont_hdr(srf, &srf->ch)) {
+		fprintf(stderr, "Error reading container header.\nExiting.\n");
+		exit(1);
+	    }
+	    break;
+
+        case SRFB_XML:
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+	    if (0 != srf_read_xml(srf, &srf->xml)) {
+		fprintf(stderr, "Error reading XML.\nExiting.\n");
+		exit(1);
+	    }
+	    break;
+
+	case SRFB_TRACE_HEADER:
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+	    if (0 != srf_read_trace_hdr(srf, &srf->th)) {
+		fprintf(stderr, "Error reading trace header.\nExiting.\n");
+		exit(1);
+	    }
+
+	    if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
+		break;
+
+	    /* Decode ZTR chunks in the header */
+	    if (srf->mf)
+		mfdestroy(srf->mf);
+
+	    srf->mf = mfcreate(NULL, 0);
+	    if (srf->th.trace_hdr_size)
+		mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, srf->mf);
+	    if (srf->ztr)
+		delete_ztr(srf->ztr);
+	    mrewind(srf->mf);
+
+	    if (NULL != (srf->ztr = partial_decode_ztr(srf, srf->mf, NULL))) {
+		srf->mf_pos = mftell(srf->mf);
+	    } else {
+		/* Maybe not enough to decode or no headerBlob. */
+		/* So delay until decoding the body. */
+		srf->mf_pos = 0;
+	    }
+	    mfseek(srf->mf, 0, SEEK_END);
+	    srf->mf_end = mftell(srf->mf);
+
+	    break;
+
+	case SRFB_TRACE_BODY: {
+	    srf_trace_body_t old_tb;
+	    ztr_t *ztr_tmp;
+            int no_trace = (level_mode & (LEVEL_CHUNK | LEVEL_BASE) ? 0 : 1);
+
+	    if (0 != srf_read_trace_body(srf, &old_tb, no_trace)) {
+		fprintf(stderr, "Error reading trace body.\nExiting.\n");
+		exit(1);
+	    }
+
+	    if (-1 == construct_trace_name(srf->th.id_prefix,
+					   (unsigned char *)old_tb.read_id,
+					   old_tb.read_id_length,
+					   name, 512)) {
+		fprintf(stderr, "Error constructing trace name.\nExiting.\n");
+		exit(1);
+	    }
+
+	    trace_body_count++;
+	    if( 1 == trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( "trace_name: %s + %s", srf->th.id_prefix, name+strlen(srf->th.id_prefix));
+	    }
+          
+	    read_count[READ_TOTAL]++;
+
+	    if (old_tb.flags & SRF_READ_FLAG_BAD_MASK ){
+		read_count[READ_BAD]++;
+	    } else {
+		read_count[READ_GOOD]++;
+	    }
+          
+	    if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
+		break;
+
+	    if (!srf->mf) {
+		fprintf(stderr, "Error reading trace body.\nExiting.\n");
+		exit(1);
+	    }
+
+	    mfseek(srf->mf, srf->mf_end, SEEK_SET);
+	    if (old_tb.trace_size) {
+		mfwrite(old_tb.trace, 1, old_tb.trace_size, srf->mf);
+		free(old_tb.trace);
+		old_tb.trace = NULL;
+	    }
+          
+	    mftruncate(srf->mf, mftell(srf->mf));
+	    mfseek(srf->mf, srf->mf_pos, SEEK_SET);
+
+	    if (srf->ztr)
+		ztr_tmp = ztr_dup(srf->ztr); /* inefficient, but simple */
+	    else
+		ztr_tmp = NULL;
+
+	    if ((ztr_tmp = partial_decode_ztr(srf, srf->mf, ztr_tmp))) {
+		int i;
+		for (i=0; i<ztr_tmp->nchunks; i++) {
+		    int ichunk = -1;
+
+		    switch (ztr_tmp->chunk[i].type) {
+		    case ZTR_TYPE_BASE:
+			ichunk = CHUNK_BASE;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			if( parse_base(ztr_tmp, &ztr_tmp->chunk[i], base_count) ){
+			    delete_ztr(ztr_tmp);
+			    return 1;
+			}
+			break;
+		    case ZTR_TYPE_CNF1:
+			ichunk = CHUNK_CNF1;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			break;
+		    case ZTR_TYPE_CNF4:
+			ichunk = CHUNK_CNF4;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			break;
+		    case ZTR_TYPE_SAMP:
+			ichunk = CHUNK_SAMP;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			break;
+		    case ZTR_TYPE_SMP4:
+			ichunk = CHUNK_SMP4;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			break;
+		    case ZTR_TYPE_REGN:
+			ichunk = CHUNK_REGN;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			if( NULL == parse_regn(ztr_tmp, &ztr_tmp->chunk[i], regn_hash) ){
+			    delete_ztr(ztr_tmp);
+			    return 1;
+			}
+			break;
+		    default:
+			break;
+		    }
+
+		    if( ichunk > -1 ) {
+			chunk_count[ichunk]++;
+			count_mdata_keys(ztr_tmp, &ztr_tmp->chunk[i], ichunk, key_count, type_count);
+		    }
+		}
+
+	    }
+
+	    if( ztr_tmp )
+		delete_ztr(ztr_tmp);
+
+	    count++;
+	    if( (level_mode == LEVEL_CHECK) && (count == 10) ){
+		printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		srf_destroy(srf, 1);
+		return 0;
+	    }
+          
+	    break;
+        }
+
+	case SRFB_INDEX: {
+            off_t pos = ftell(srf->fp);
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+            printf( "Reading srf index block\n");
+	    if (0 != srf_read_index_hdr(srf, &srf->hdr, 1)) {
+		srf_destroy(srf, 1);
+		fprintf(stderr, "Error reading srf index block header.\nExiting.\n");
+		exit(1);
+	    }
+            /* Skip the index body */
+	    fseeko(srf->fp, pos + srf->hdr.size, SEEK_SET);
+
+            break;
+        }
+        
+	case SRFB_NULL_INDEX: {
+            uint64_t ilen;
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+            printf( "Reading srf null index block\n");
+	    /*
+	     * Maybe the last 8 bytes of a the file (or previously was
+	     * last 8 bytes prior to concatenating SRF files together).
+	     * If so it's the index length and should always be 8 zeros.
+	     */
+            if (1 != fread(&ilen, 8, 1, srf->fp)) {
+		srf_destroy(srf, 1);
+		fprintf(stderr, "Error reading srf null index block.\nExiting.\n");
+		exit(1);
+            }
+            if (ilen != 0) {
+		srf_destroy(srf, 1);
+		fprintf(stderr, "Invalid srf null index block.\nExiting.\n");
+		exit(1);
+            }
+
+            break;
+        }
+        
+        default:
+            srf_destroy(srf, 1);
+	    fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
+	    exit(1);
+	}
+
+    }
+
+    if( trace_body_count ){
+        if( level_mode & LEVEL_NAME )
+            printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+        trace_body_count = 0;
+    }
+
+    /* the type should be -1 (EOF) */
+    if( type != -1 ) {
+        fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
+	exit(1);
+    }
+
+    /* are we really at the end of the srf file */
+    pos = ftell(srf->fp);
+    fseek(srf->fp, 0, SEEK_END);
+    if( pos != ftell(srf->fp) ){
+        fprintf(stderr, "srf file is corrupt\n");
+	exit(1);
+    }
+    
+    srf_destroy(srf, 1);
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/*
+ * Main method.
+ */
+int main(int argc, char **argv) {
+    int ifile, nfiles;
+    char *input = NULL;
+
+    int c;
+    int errflg = 0;
+    extern char *optarg;
+    extern int optind, optopt;
+
+    int level_mode = LEVEL_ALL;
+
+    long read_count[NREADS];
+    char *read_str[] = {READ_GOOD_STR, READ_BAD_STR, READ_TOTAL_STR};
+    long chunk_count[NCHUNKS];
+    uint64_t chunk_size[NCHUNKS];
+    uint4 chunk_type[] = {CHUNK_BASE_TYPE, CHUNK_CNF1_TYPE, CHUNK_CNF4_TYPE, CHUNK_SAMP_TYPE, CHUNK_SMP4_TYPE, CHUNK_REGN_TYPE};
+    long key_count[NCHUNKS][NKEYS];
+    char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
+    long type_count[NCHUNKS][NTYPES];
+    char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
+    int iread, ichunk, ikey, itype;
+
+    while ((c = getopt(argc, argv, "l:")) != -1) {
+        switch (c) {
+        case 'l':
+            if (1 != sscanf(optarg, "%d", &level_mode)) {
+                fprintf(stderr,
+                        "Otion -%c requires an operand\n", optopt);
+                errflg++;
+            }
+	    break;
+        case ':':       /* -? without operand */
+            fprintf(stderr,
+                    "Option -%c requires an operand\n", optopt);
+            errflg++;
+            break;
+        case '?':
+            fprintf(stderr,
+                    "Unrecognised option: -%c\n", optopt);
+            errflg++;
+        }
+    }
+
+    if (errflg) {
+	usage(1);
+    }
+
+    nfiles = (argc-optind);
+    if( nfiles < 1 ){
+        fprintf(stderr, "Please specify input archive name(s).\n");
+        usage(1);
+    }
+    
+    for (ifile=0; ifile<nfiles; ifile++) {
+        HashTable *regn_hash;
+        char bases[] = "ACGTN";
+        uint64_t base_count[5];
+        char type[5];
+
+        input = argv[optind+ifile];
+        printf("Reading archive %s.\n", input);
+
+        for (iread=0; iread<NREADS; iread++)
+	    read_count[iread] = 0;
+
+        for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
+	    chunk_count[ichunk] = 0;
+	    chunk_size[ichunk] = 0;
+	}
+
+        for (ichunk=0; ichunk<NCHUNKS; ichunk++)
+            for (ikey=0; ikey<NKEYS; ikey++)
+                key_count[ichunk][ikey] = 0;
+
+        for (ichunk=0; ichunk<NCHUNKS; ichunk++)
+            for (itype=0; itype<NTYPES; itype++)
+                type_count[ichunk][itype] = 0;
+
+        if (NULL == (regn_hash = HashTableCreate(0, HASH_DYNAMIC_SIZE|HASH_FUNC_JENKINS3))) {
+	    return 1;
+        }
+    
+        memset(base_count, 0, 5 * sizeof(uint64_t));
+
+        if( 0 == srf_info(input, level_mode, read_count,
+			  chunk_count, chunk_size,
+			  key_count, type_count, regn_hash, base_count) ){
+
+            /* read counts */
+            if( level_mode & LEVEL_READ ) {
+                for (iread=0; iread<NREADS; iread++) {
+                    if( read_count[iread] )
+			printf("Reads: %s : %ld\n", read_str[iread], read_count[iread]);
+                }
+            }
+
+            /* chunk, key and type counts */
+            if( level_mode & LEVEL_CHUNK ) {
+                for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
+                    if( chunk_count[ichunk] ) {
+                        printf("Chunk: %s : %ld %"PRId64"\n",
+			       ZTR_BE2STR(chunk_type[ichunk], type),
+			       chunk_count[ichunk], chunk_size[ichunk]);
+                        for (ikey=0; ikey<NKEYS; ikey++) {
+                            if(key_count[ichunk][ikey]) {
+                                printf("  Mdata key: %s : %ld\n", keys_str[ikey], key_count[ichunk][ikey]);
+                                if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
+                                    for (itype=0; itype<NTYPES; itype++)
+                                        if(type_count[ichunk][itype])
+                                            printf("    types: %s : %ld\n", types_str[itype], type_count[ichunk][itype]);
+                                }
+                                if (ikey == KEY_NAME && (ichunk == CHUNK_REGN)) {
+                                    int ibucket;
+                                    for (ibucket=0; ibucket<regn_hash->nbuckets; ibucket++) {
+                                        HashItem *hi;
+                                        for (hi = regn_hash->bucket[ibucket]; hi; hi = hi->next) {
+                                            regn_t *regn = (regn_t *)hi->data.p;
+                                            printf("    %s x%d\n", hi->key, regn->count);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            /* base counts */
+            if( level_mode & LEVEL_BASE ) {
+                uint64_t total = 0;
+                int i;
+                for (i=0; i<5; i++) {
+                    if( base_count[i] ){
+                        printf("Bases: %c: %"PRId64"\n",
+			       bases[i], base_count[i]);
+                        total += base_count[i];
+                    }
+                }
+                printf("Bases: TOTAL: %"PRId64"\n", total);
+            }
+        }
+    }
+
+    return 0;
+}
author	dawe
date	Tue, 07 Jun 2011 17:48:05 -0400
parents
children