srf2fastq: srf2fastq/io_lib-1.12.2/progs/srf

comparison srf2fastq/io_lib-1.12.2/progs/srf_info.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository

author	dawe
date	Tue, 07 Jun 2011 17:48:05 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:d901c9f41a6a
+/*
+* ======================================================================
+* This software has been created by Genome Research Limited (GRL).
+*
+* GRL hereby grants permission to use, copy, modify and distribute
+* this software and its documentation for non-commercial purposes
+* without fee at the user's own risk on the basis set out below.
+*
+* GRL neither undertakes nor accepts any duty whether contractual or
+* otherwise in connection with the software, its use or the use of
+* any derivative, and makes no representations or warranties, express
+* or implied, concerning the software, its suitability, fitness for
+* a particular purpose or non-infringement.
+*
+* In no event shall the authors of the software or GRL be responsible
+* or liable for any loss or damage whatsoever arising in any way
+* directly or indirectly out of the use of this software or its
+* derivatives, even if advised of the possibility of such damage.
+*
+* Our software can be freely distributed under the conditions set out
+* above, and must contain this copyright notice.
+* ======================================================================
+*/
+/*
+* This performs a linear (non-indexed) search for a trace in an SRF archive.
+*
+* It's not intended as a suitable production program or as a library of code
+* to use, but as a test and benchmark statistic.
+*/
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <io_lib/Read.h>
+#include <io_lib/misc.h>
+#include <io_lib/ztr.h>
+#include <io_lib/srf.h>
+#include <io_lib/hash_table.h>
+#define LEVEL_READ  (1 << 0)
+#define LEVEL_CHUNK (1 << 1)
+#define LEVEL_NAME  (1 << 2)
+#define LEVEL_BASE  (1 << 3)
+#define LEVEL_ALL   (LEVEL_READ | LEVEL_CHUNK | LEVEL_NAME | LEVEL_BASE );
+/* only checks the first 10 traces */
+#define LEVEL_CHECK 255
+#define READ_GOOD  0
+#define READ_BAD   1
+#define READ_TOTAL 2
+#define NREADS 3
+#define READ_GOOD_STR   "GOOD"
+#define READ_BAD_STR    "BAD"
+#define READ_TOTAL_STR  "TOTAL"
+/* see ztr.h for a list of all possible ztr chunk types */
+#define CHUNK_BASE 0
+#define CHUNK_CNF1 1
+#define CHUNK_CNF4 2
+#define CHUNK_SAMP 3
+#define CHUNK_SMP4 4
+#define CHUNK_REGN 5
+#define NCHUNKS 6
+#define CHUNK_BASE_TYPE ZTR_TYPE_BASE
+#define CHUNK_CNF1_TYPE ZTR_TYPE_CNF1
+#define CHUNK_CNF4_TYPE ZTR_TYPE_CNF4
+#define CHUNK_SAMP_TYPE ZTR_TYPE_SAMP
+#define CHUNK_SMP4_TYPE ZTR_TYPE_SMP4
+#define CHUNK_REGN_TYPE ZTR_TYPE_REGN
+#define KEY_TYPE    0
+#define KEY_VALTYPE 1
+#define KEY_GROUP   2
+#define KEY_OFFS    3
+#define KEY_SCALE   4
+#define KEY_COORD   5
+#define KEY_NAME    6
+#define NKEYS 7
+#define KEY_TYPE_STR    "TYPE"
+#define KEY_VALTYPE_STR "VALTYPE"
+#define KEY_GROUP_STR   "GROUP"
+#define KEY_OFFS_STR    "OFFS"
+#define KEY_SCALE_STR   "SCALE"
+#define KEY_COORD_STR   "COORD"
+#define KEY_NAME_STR    "NAME"
+#define TYPE_PROC 0
+#define TYPE_SLXI 1
+#define TYPE_SLXN 2
+#define TYPE_0FAM 3
+#define TYPE_1CY3 4
+#define TYPE_2TXR 5
+#define TYPE_3CY5 6
+#define NTYPES 7
+#define TYPE_PROC_STR "PROC"
+#define TYPE_SLXI_STR "SLXI"
+#define TYPE_SLXN_STR "SLXN"
+#define TYPE_0FAM_STR "0FAM"
+#define TYPE_1CY3_STR "1CY3"
+#define TYPE_2TXR_STR "2TXR"
+#define TYPE_3CY5_STR "3CY5"
+#define MAX_REGIONS   4
+/* regn chunk */
+typedef struct {
+char coord;
+char *region_names;
+int nregions;
+char *name[MAX_REGIONS];
+char code[MAX_REGIONS];
+int start[MAX_REGIONS];
+int length[MAX_REGIONS];
+int count;
+} regn_t;
+/* ------------------------------------------------------------------------ */
+/*
+* Print usage message to stderr and exit with the given \"code\".
+*/
+void usage(int code) {
+fprintf(stderr, "Usage: srf_info [-level level_bitmap] input(s)\n");
+fprintf(stderr, "Options:\n");
+fprintf(stderr, "    -l level_bitmap \n");
+fprintf(stderr, "              1\tCount of good/bad reads.\n");
+fprintf(stderr, "              2\tCounts for selected chunk types.\n");
+fprintf(stderr, "              4\tTrace count and trace name prefix for each trace_header.\n");
+fprintf(stderr, "              8\tBase count.\n");
+fprintf(stderr, "\n");
+exit(code);
+}
+/*
+* Parse the REGN chunk, add to regn HASH
+*
+* Returns corresponding HashItem * from regn Hash
+*/
+HashItem *parse_regn(ztr_t *z, ztr_chunk_t *chunk, HashTable *regn_hash) {
+char key[1024];
+char *name;
+HashItem *hi;
+regn_t *regn;
+size_t l;
+uncompress_chunk(z, chunk);
+/* the hash key is a combination of the region names and boundaries */
+name = ztr_lookup_mdata_value(z, chunk, "NAME");
+l = snprintf(key, sizeof(key), "names=%s", name);
+if( chunk->dlength ){
+int nbndy = (chunk->dlength-1)/4;
+uint4 *bndy = (uint4 *)(chunk->data+1);
+int ibndy;
+	for (ibndy=0; ibndy<nbndy; ibndy++) {
+if( ibndy )
+l += snprintf(key + l, sizeof(key) - l,
+			      ";%d", be_int4(bndy[ibndy]));
+else
+l += snprintf(key + l, sizeof(key) - l,
+			      " boundaries=%d", be_int4(bndy[ibndy]));
+}
+}
+if (NULL == (hi = (HashTableSearch(regn_hash, key, strlen(key))))) {
+int iregion, nregions = 0;
+char *coord;
+	char *cp1;
+uint4 bndy[MAX_REGIONS];
+int ibndy, nbndy = 0;
+HashData hd;
+if( NULL == (regn = (regn_t *)malloc(sizeof(regn_t)))) {
+	    return NULL;
+	}
+	coord = ztr_lookup_mdata_value(z, chunk, "COORD");
+	regn->coord = (NULL == coord ? 'B' : *coord );
+	regn->region_names = strdup(name);
+cp1 = strtok (regn->region_names,";");
+while(cp1) {
+char *cp2;
+if(NULL == (cp2 = strchr(cp1,':'))) {
+fprintf(stderr, "Invalid region name/code pair %s\n", cp1);
+return NULL;
+}
+*cp2++ = '\0';
+regn->name[nregions] = cp1;
+regn->code[nregions] = *cp2;
+nregions++;
+cp1 = strtok (NULL, ";");
+}
+regn->nregions = nregions;
+	if( chunk->dlength ) {
+nbndy = (chunk->dlength-1)/4;
+memcpy(bndy, chunk->data+1, chunk->dlength-1);
+	}
+for( iregion=0, ibndy=0; iregion<nregions; iregion++) {
+/* start = (start + length of previous region) or 0 if no previous region */
+/* length = (next boundary - start of region) or -1 if no next boundary */
+if( regn->code[iregion] == 'E' ){
+/* no sequence, length = 0 */
+regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
+regn->length[iregion] = 0;
+}else{
+if( ibndy > nbndy ){
+fprintf(stderr, "More name/code pairs than boundaries\n");
+return NULL;
+}
+regn->start[iregion] = (iregion ? (regn->start[iregion-1] + regn->length[iregion-1]) : 0);
+regn->length[iregion] = (ibndy == nbndy ? -1 : (be_int4(bndy[ibndy])-regn->start[iregion]));
+ibndy++;
+}
+}
+regn->count = 1;
+	hd.p = regn;
+	if (NULL == (hi = HashTableAdd(regn_hash, key, strlen(key), hd, NULL))) {
+	    free(regn->region_names);
+	    free(regn);
+	    return NULL;
+	}
+} else {
+	regn = (regn_t *)(hi->data.p);
+	regn->count++;
+}
+return hi;
+}
+/*
+* Parse the sequence
+*
+* Returns 0 on success.
+*/
+int parse_base(ztr_t *z, ztr_chunk_t *chunk, uint64_t *base_count) {
+int i;
+uncompress_chunk(z, chunk);
+for (i = 1; i < chunk->dlength; i++) {
+	char base = chunk->data[i];
+uint1 key;
+	switch (base) {
+	case 'A': case 'a':
+key = 0;
+break;
+	case 'C': case 'c':
+key = 1;
+break;
+	case 'G': case 'g':
+key = 2;
+break;
+	case 'T': case 't':
+key = 3;
+break;
+	default:
+key = 4;
+break;
+	}
+base_count[key]++;
+}
+return 0;
+}
+/*
+* count the mdata keys
+*
+* Returns 0 on success.
+*/
+int count_mdata_keys(ztr_t *z, ztr_chunk_t *chunk, int ichunk, long key_count[NCHUNKS][NKEYS], long type_count[NCHUNKS][NTYPES]) {
+char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
+char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
+int ikey, itype;
+if (z->header.version_major > 1 ||
+	z->header.version_minor >= 2) {
+	/* ZTR format 1.2 onwards */
+	char *cp = chunk->mdata;
+	int32_t dlen = chunk->mdlength;
+	/*
+	 * NB: we may wish to rewrite this using a dedicated state machine
+	 * instead of strlen/strcmp as this currently assumes the meta-
+	 * data is correctly formatted, which we cannot assume as the
+	 * metadata is external and outside of our control.
+	 * Passing in non-nul terminated strings could crash this code.
+	 */
+	while (dlen > 0) {
+	    size_t l;
+	    /* key */
+	    l = strlen(cp);
+	    for (ikey=0; ikey<NKEYS; ikey++)
+		if(0 == strcmp(cp, keys_str[ikey]))
+		    break;
+	    cp += l+1;
+	    dlen -= l+1;
+	    /* value */
+	    if (ikey < NKEYS)
+		key_count[ichunk][ikey]++;
+	    /* for the type key check the value */
+	    if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
+		for (itype=0; itype<NTYPES; itype++)
+		    if(0 == strcmp(cp, types_str[itype]))
+			break;
+		if(itype < NTYPES)
+		    type_count[ichunk][itype]++;
+	    }
+	    l = strlen(cp);
+	    cp += l+1;
+	    dlen -= l+1;
+	}
+} else {
+	/* v1.1 and before only supported a few types, specifically coded
+	 * per chunk type.
+	 */
+	switch (chunk->type) {
+	case ZTR_TYPE_SAMP:
+	case ZTR_TYPE_SMP4:
+	    key_count[ichunk][KEY_TYPE]++;
+	    for (itype=0; itype<NTYPES; itype++)
+		if(0 == strcmp(chunk->mdata, types_str[itype])) {
+		    type_count[ichunk][itype]++;
+		}
+	    break;
+	default:
+	    break;
+	}
+}
+return 0;
+}
+/*
+* As per partial_decode_ztr in srf.c, but without uncompress_ztr.
+*/
+ztr_t *partial_decode_ztr2(srf_t *srf, mFILE *mf, ztr_t *z) {
+ztr_t *ztr;
+ztr_chunk_t *chunk;
+long pos = 0;
+if (z) {
+	/* Use existing ZTR object => already loaded header */
+	ztr = z;
+} else {
+	/* Allocate or use existing ztr */
+	if (NULL == (ztr = new_ztr()))
+	    return NULL;
+	/* Read the header */
+	if (-1 == ztr_read_header(mf, &ztr->header)) {
+	    if (!z)
+		delete_ztr(ztr);
+	    mrewind(mf);
+	    return NULL;
+	}
+	/* Check magic number and version */
+	if (memcmp(ztr->header.magic, ZTR_MAGIC, 8) != 0) {
+	    if (!z)
+		delete_ztr(ztr);
+	    mrewind(mf);
+	    return NULL;
+	}
+	if (ztr->header.version_major != ZTR_VERSION_MAJOR) {
+	    if (!z)
+		delete_ztr(ztr);
+	    mrewind(mf);
+	    return NULL;
+	}
+}
+/* Load chunks */
+pos = mftell(mf);
+while (chunk = ztr_read_chunk_hdr(mf)) {
+	chunk->data = (char *)xmalloc(chunk->dlength);
+	if (chunk->dlength != mfread(chunk->data, 1, chunk->dlength, mf))
+	    break;
+	ztr->nchunks++;
+	ztr->chunk = (ztr_chunk_t *)xrealloc(ztr->chunk, ztr->nchunks *
+					     sizeof(ztr_chunk_t));
+	memcpy(&ztr->chunk[ztr->nchunks-1], chunk, sizeof(*chunk));
+	xfree(chunk);
+	pos = mftell(mf);
+}
+/*
+* At this stage we're 'pos' into the mFILE mf with any remainder being
+* a partial block.
+*/
+if (0 == ztr->nchunks) {
+	if (!z)
+	    delete_ztr(ztr);
+	mrewind(mf);
+	return NULL;
+}
+/* Ensure we exit at the start of a ztr CHUNK */
+mfseek(mf, pos, SEEK_SET);
+/* If this is the header part, ensure we uncompress and init. data */
+if (!z) {
+	/* Force caching of huffman code_sets */
+	ztr_find_hcode(ztr, CODE_USER);
+	/* And uncompress the rest */
+	uncompress_ztr(ztr);
+}
+return ztr;
+}
+/*
+* Given the archive name and the level_mode
+* generate information about the archive
+*
+* Note the generated srf file is NOT indexed
+*
+* Returns 0 on success.
+*/
+int srf_info(char *input, int level_mode, long *read_count, long *chunk_count,
+	     uint64_t *chunk_size, long key_count[NCHUNKS][NKEYS],
+	     long type_count[NCHUNKS][NTYPES], HashTable *regn_hash,
+	     uint64_t *base_count) {
+srf_t *srf;
+off_t pos;
+int type;
+int count = 0;
+long trace_body_count = 0;
+char name[1024];
+if (NULL == (srf = srf_open(input, "rb"))) {
+	perror(input);
+	return 1;
+}
+while ((type = srf_next_block_type(srf)) >= 0) {
+switch (type) {
+	case SRFB_CONTAINER:
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+	    if (0 != srf_read_cont_hdr(srf, &srf->ch)) {
+		fprintf(stderr, "Error reading container header.\nExiting.\n");
+		exit(1);
+	    }
+	    break;
+case SRFB_XML:
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+	    if (0 != srf_read_xml(srf, &srf->xml)) {
+		fprintf(stderr, "Error reading XML.\nExiting.\n");
+		exit(1);
+	    }
+	    break;
+	case SRFB_TRACE_HEADER:
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+	    if (0 != srf_read_trace_hdr(srf, &srf->th)) {
+		fprintf(stderr, "Error reading trace header.\nExiting.\n");
+		exit(1);
+	    }
+	    if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
+		break;
+	    /* Decode ZTR chunks in the header */
+	    if (srf->mf)
+		mfdestroy(srf->mf);
+	    srf->mf = mfcreate(NULL, 0);
+	    if (srf->th.trace_hdr_size)
+		mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, srf->mf);
+	    if (srf->ztr)
+		delete_ztr(srf->ztr);
+	    mrewind(srf->mf);
+	    if (NULL != (srf->ztr = partial_decode_ztr(srf, srf->mf, NULL))) {
+		srf->mf_pos = mftell(srf->mf);
+	    } else {
+		/* Maybe not enough to decode or no headerBlob. */
+		/* So delay until decoding the body. */
+		srf->mf_pos = 0;
+	    }
+	    mfseek(srf->mf, 0, SEEK_END);
+	    srf->mf_end = mftell(srf->mf);
+	    break;
+	case SRFB_TRACE_BODY: {
+	    srf_trace_body_t old_tb;
+	    ztr_t *ztr_tmp;
+int no_trace = (level_mode & (LEVEL_CHUNK | LEVEL_BASE) ? 0 : 1);
+	    if (0 != srf_read_trace_body(srf, &old_tb, no_trace)) {
+		fprintf(stderr, "Error reading trace body.\nExiting.\n");
+		exit(1);
+	    }
+	    if (-1 == construct_trace_name(srf->th.id_prefix,
+					   (unsigned char *)old_tb.read_id,
+					   old_tb.read_id_length,
+					   name, 512)) {
+		fprintf(stderr, "Error constructing trace name.\nExiting.\n");
+		exit(1);
+	    }
+	    trace_body_count++;
+	    if( 1 == trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( "trace_name: %s + %s", srf->th.id_prefix, name+strlen(srf->th.id_prefix));
+	    }
+	    read_count[READ_TOTAL]++;
+	    if (old_tb.flags & SRF_READ_FLAG_BAD_MASK ){
+		read_count[READ_BAD]++;
+	    } else {
+		read_count[READ_GOOD]++;
+	    }
+	    if( 0 == (level_mode & (LEVEL_CHUNK | LEVEL_BASE)) )
+		break;
+	    if (!srf->mf) {
+		fprintf(stderr, "Error reading trace body.\nExiting.\n");
+		exit(1);
+	    }
+	    mfseek(srf->mf, srf->mf_end, SEEK_SET);
+	    if (old_tb.trace_size) {
+		mfwrite(old_tb.trace, 1, old_tb.trace_size, srf->mf);
+		free(old_tb.trace);
+		old_tb.trace = NULL;
+	    }
+	    mftruncate(srf->mf, mftell(srf->mf));
+	    mfseek(srf->mf, srf->mf_pos, SEEK_SET);
+	    if (srf->ztr)
+		ztr_tmp = ztr_dup(srf->ztr); /* inefficient, but simple */
+	    else
+		ztr_tmp = NULL;
+	    if ((ztr_tmp = partial_decode_ztr(srf, srf->mf, ztr_tmp))) {
+		int i;
+		for (i=0; i<ztr_tmp->nchunks; i++) {
+		    int ichunk = -1;
+		    switch (ztr_tmp->chunk[i].type) {
+		    case ZTR_TYPE_BASE:
+			ichunk = CHUNK_BASE;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			if( parse_base(ztr_tmp, &ztr_tmp->chunk[i], base_count) ){
+			    delete_ztr(ztr_tmp);
+			    return 1;
+			}
+			break;
+		    case ZTR_TYPE_CNF1:
+			ichunk = CHUNK_CNF1;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			break;
+		    case ZTR_TYPE_CNF4:
+			ichunk = CHUNK_CNF4;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			break;
+		    case ZTR_TYPE_SAMP:
+			ichunk = CHUNK_SAMP;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			break;
+		    case ZTR_TYPE_SMP4:
+			ichunk = CHUNK_SMP4;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			break;
+		    case ZTR_TYPE_REGN:
+			ichunk = CHUNK_REGN;
+			chunk_size[ichunk] += ztr_tmp->chunk[i].dlength;
+			if( NULL == parse_regn(ztr_tmp, &ztr_tmp->chunk[i], regn_hash) ){
+			    delete_ztr(ztr_tmp);
+			    return 1;
+			}
+			break;
+		    default:
+			break;
+		    }
+		    if( ichunk > -1 ) {
+			chunk_count[ichunk]++;
+			count_mdata_keys(ztr_tmp, &ztr_tmp->chunk[i], ichunk, key_count, type_count);
+		    }
+		}
+	    }
+	    if( ztr_tmp )
+		delete_ztr(ztr_tmp);
+	    count++;
+	    if( (level_mode == LEVEL_CHECK) && (count == 10) ){
+		printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		srf_destroy(srf, 1);
+		return 0;
+	    }
+	    break;
+}
+	case SRFB_INDEX: {
+off_t pos = ftell(srf->fp);
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+printf( "Reading srf index block\n");
+	    if (0 != srf_read_index_hdr(srf, &srf->hdr, 1)) {
+		srf_destroy(srf, 1);
+		fprintf(stderr, "Error reading srf index block header.\nExiting.\n");
+		exit(1);
+	    }
+/* Skip the index body */
+	    fseeko(srf->fp, pos + srf->hdr.size, SEEK_SET);
+break;
+}
+	case SRFB_NULL_INDEX: {
+uint64_t ilen;
+	    if( trace_body_count ){
+		if( level_mode & LEVEL_NAME )
+		    printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+		trace_body_count = 0;
+	    }
+printf( "Reading srf null index block\n");
+	    /*
+	     * Maybe the last 8 bytes of a the file (or previously was
+	     * last 8 bytes prior to concatenating SRF files together).
+	     * If so it's the index length and should always be 8 zeros.
+	     */
+if (1 != fread(&ilen, 8, 1, srf->fp)) {
+		srf_destroy(srf, 1);
+		fprintf(stderr, "Error reading srf null index block.\nExiting.\n");
+		exit(1);
+}
+if (ilen != 0) {
+		srf_destroy(srf, 1);
+		fprintf(stderr, "Invalid srf null index block.\nExiting.\n");
+		exit(1);
+}
+break;
+}
+default:
+srf_destroy(srf, 1);
+	    fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
+	    exit(1);
+	}
+}
+if( trace_body_count ){
+if( level_mode & LEVEL_NAME )
+printf( " ... %s x%ld\n", name+strlen(srf->th.id_prefix), trace_body_count);
+trace_body_count = 0;
+}
+/* the type should be -1 (EOF) */
+if( type != -1 ) {
+fprintf(stderr, "Block of unknown type '%c'\nExiting.\n", type);
+	exit(1);
+}
+/* are we really at the end of the srf file */
+pos = ftell(srf->fp);
+fseek(srf->fp, 0, SEEK_END);
+if( pos != ftell(srf->fp) ){
+fprintf(stderr, "srf file is corrupt\n");
+	exit(1);
+}
+srf_destroy(srf, 1);
+return 0;
+}
+/* ------------------------------------------------------------------------ */
+/*
+* Main method.
+*/
+int main(int argc, char **argv) {
+int ifile, nfiles;
+char *input = NULL;
+int c;
+int errflg = 0;
+extern char *optarg;
+extern int optind, optopt;
+int level_mode = LEVEL_ALL;
+long read_count[NREADS];
+char *read_str[] = {READ_GOOD_STR, READ_BAD_STR, READ_TOTAL_STR};
+long chunk_count[NCHUNKS];
+uint64_t chunk_size[NCHUNKS];
+uint4 chunk_type[] = {CHUNK_BASE_TYPE, CHUNK_CNF1_TYPE, CHUNK_CNF4_TYPE, CHUNK_SAMP_TYPE, CHUNK_SMP4_TYPE, CHUNK_REGN_TYPE};
+long key_count[NCHUNKS][NKEYS];
+char *keys_str[] = {KEY_TYPE_STR, KEY_VALTYPE_STR, KEY_GROUP_STR, KEY_OFFS_STR, KEY_SCALE_STR, KEY_COORD_STR, KEY_NAME_STR};
+long type_count[NCHUNKS][NTYPES];
+char *types_str[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR};
+int iread, ichunk, ikey, itype;
+while ((c = getopt(argc, argv, "l:")) != -1) {
+switch (c) {
+case 'l':
+if (1 != sscanf(optarg, "%d", &level_mode)) {
+fprintf(stderr,
+"Otion -%c requires an operand\n", optopt);
+errflg++;
+}
+	    break;
+case ':':       /* -? without operand */
+fprintf(stderr,
+"Option -%c requires an operand\n", optopt);
+errflg++;
+break;
+case '?':
+fprintf(stderr,
+"Unrecognised option: -%c\n", optopt);
+errflg++;
+}
+}
+if (errflg) {
+	usage(1);
+}
+nfiles = (argc-optind);
+if( nfiles < 1 ){
+fprintf(stderr, "Please specify input archive name(s).\n");
+usage(1);
+}
+for (ifile=0; ifile<nfiles; ifile++) {
+HashTable *regn_hash;
+char bases[] = "ACGTN";
+uint64_t base_count[5];
+char type[5];
+input = argv[optind+ifile];
+printf("Reading archive %s.\n", input);
+for (iread=0; iread<NREADS; iread++)
+	    read_count[iread] = 0;
+for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
+	    chunk_count[ichunk] = 0;
+	    chunk_size[ichunk] = 0;
+	}
+for (ichunk=0; ichunk<NCHUNKS; ichunk++)
+for (ikey=0; ikey<NKEYS; ikey++)
+key_count[ichunk][ikey] = 0;
+for (ichunk=0; ichunk<NCHUNKS; ichunk++)
+for (itype=0; itype<NTYPES; itype++)
+type_count[ichunk][itype] = 0;
+if (NULL == (regn_hash = HashTableCreate(0, HASH_DYNAMIC_SIZE|HASH_FUNC_JENKINS3))) {
+	    return 1;
+}
+memset(base_count, 0, 5 * sizeof(uint64_t));
+if( 0 == srf_info(input, level_mode, read_count,
+			  chunk_count, chunk_size,
+			  key_count, type_count, regn_hash, base_count) ){
+/* read counts */
+if( level_mode & LEVEL_READ ) {
+for (iread=0; iread<NREADS; iread++) {
+if( read_count[iread] )
+			printf("Reads: %s : %ld\n", read_str[iread], read_count[iread]);
+}
+}
+/* chunk, key and type counts */
+if( level_mode & LEVEL_CHUNK ) {
+for (ichunk=0; ichunk<NCHUNKS; ichunk++) {
+if( chunk_count[ichunk] ) {
+printf("Chunk: %s : %ld %"PRId64"\n",
+			       ZTR_BE2STR(chunk_type[ichunk], type),
+			       chunk_count[ichunk], chunk_size[ichunk]);
+for (ikey=0; ikey<NKEYS; ikey++) {
+if(key_count[ichunk][ikey]) {
+printf("  Mdata key: %s : %ld\n", keys_str[ikey], key_count[ichunk][ikey]);
+if (ikey == KEY_TYPE && (ichunk == CHUNK_SAMP || ichunk == CHUNK_SMP4)) {
+for (itype=0; itype<NTYPES; itype++)
+if(type_count[ichunk][itype])
+printf("    types: %s : %ld\n", types_str[itype], type_count[ichunk][itype]);
+}
+if (ikey == KEY_NAME && (ichunk == CHUNK_REGN)) {
+int ibucket;
+for (ibucket=0; ibucket<regn_hash->nbuckets; ibucket++) {
+HashItem *hi;
+for (hi = regn_hash->bucket[ibucket]; hi; hi = hi->next) {
+regn_t *regn = (regn_t *)hi->data.p;
+printf("    %s x%d\n", hi->key, regn->count);
+}
+}
+}
+}
+}
+}
+}
+}
+/* base counts */
+if( level_mode & LEVEL_BASE ) {
+uint64_t total = 0;
+int i;
+for (i=0; i<5; i++) {
+if( base_count[i] ){
+printf("Bases: %c: %"PRId64"\n",
+			       bases[i], base_count[i]);
+total += base_count[i];
+}
+}
+printf("Bases: TOTAL: %"PRId64"\n", total);
+}
+}
+}
+return 0;
+}

Mercurial > repos > dawe > srf2fastq

comparison srf2fastq/io_lib-1.12.2/progs/srf_info.c @ 0:d901c9f41a6a default tip