view srf2fastq/io_lib-1.12.2/progs/srf_extract_hash.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
line wrap: on
line source

#ifdef HAVE_CONFIG_H
#include "io_lib_config.h"
#endif

#include <stdio.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <io_lib/hash_table.h>
#include <io_lib/srf.h>

static char qlookup[256];
void init_qlookup(void) {
    int i;
    for (i = -128; i < 128; i++) {
        qlookup[i+128] = '!' + (int)((10*log(1+pow(10, i/10.0))/log(10)+.499));
    }
}

/* ------------------------------------------------------------------------ */

#define MAX_READ_LEN 10000
void ztr2fastq(ztr_t *z, char *name, int calibrated) {
    int i, nc, seq_len;
    char buf[MAX_READ_LEN*2 + 512 + 6];
    char *seq, *qual, *sdata, *qdata;
    ztr_chunk_t **chunks;

    /* Extract the sequence only */
    chunks = ztr_find_chunks(z, ZTR_TYPE_BASE, &nc);
    if (nc != 1) {
	fprintf(stderr, "Zero or greater than one BASE chunks found.\n");
	return;
    }
    uncompress_chunk(z, chunks[0]);
    sdata = chunks[0]->data+1;
    seq_len = chunks[0]->dlength-1;

    /* Extract the quality */
    if (calibrated)
	chunks = ztr_find_chunks(z, ZTR_TYPE_CNF1, &nc);
    else
	chunks = ztr_find_chunks(z, ZTR_TYPE_CNF4, &nc);

    if (nc != 1) {
	fprintf(stderr, "Zero or greater than one CNF chunks found.\n");
	return;
    }
    uncompress_chunk(z, chunks[0]);
    qdata = chunks[0]->data+1;

    /* Construct fastq entry */
    seq = buf;
    *seq++ = '@';
    while (*name)
	*seq++ = *name++;
    *seq++ = '\n';

    qual = seq + seq_len;
    *qual++ = '\n';
    *qual++ = '+';
    *qual++ = '\n';

    for (i = 0; i < seq_len; i++) {
	if (*sdata != '.') {
	    *seq++ = *sdata++;
	} else {
	    *seq++ = 'N';
	    sdata++;
	}
	*qual++ = calibrated
	    ? *qdata++ + '!'
	    : qlookup[*qdata++ + 128];
    }
    *qual++ = '\n';

    fwrite(buf, 1, qual - buf, stdout);
}

/* ------------------------------------------------------------------------ */
void usage(void) {
    fprintf(stderr, "Usage: srf_extract [-fastq] [-c] archive_name trace_name ...\n");
    exit(1);
}

int main(int argc, char **argv) {
    FILE *fp;
    srf_t *srf;
    char *archive, *trace;
    uint64_t cpos, hpos, dpos;
    int fastq = 0, calibrated = 0, i;

    /* Parse args */
    for (i = 1; i < argc && argv[i][0] == '-'; i++) {
	if (!strcmp(argv[i], "-")) {
	    break;
	} else if (!strcmp(argv[i], "-c")) {
	    calibrated = 1;
	} else if (!strcmp(argv[i], "-fastq")) {
	    fastq = 1;
	} else {
	    usage();
	}
    }    

    if (argc < (i+2)) {
      usage();
    }

    /* the SRF archive */
    archive = argv[i++];
    fp = fopen(archive, "rb");
    if (NULL == fp) {
        perror(archive);
        return 1;
    }
    srf = srf_create(fp);

    /* the trace */
    trace = argv[i];

    if( fastq ){
        read_sections(READ_BASES);
        init_qlookup();
    }else{
#ifdef _WIN32
        _setmode(_fileno(stdout), _O_BINARY);
#endif
    }

    /* Search index */
    switch (srf_find_trace(srf, trace, &cpos, &hpos, &dpos)) {
    case -1:
        fprintf(stderr, "Malformed or missing index hash. "
                "Consider running srf_index_hash\n");
        return 1;

    case -2:
        fprintf(stderr, "%s: not found\n", trace);
        break;

    default:
        /* The srf object holds the latest data and trace header blocks */
        if( fastq ){
            mFILE *mf = mfcreate(NULL, 0);
            mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, mf);
            mfwrite(srf->tb.trace,     1, srf->tb.trace_size,     mf);
            mfseek(mf, 0, SEEK_SET);
            ztr_t *ztr = partial_decode_ztr(srf, mf, NULL);
            ztr2fastq(ztr, trace, calibrated);
            delete_ztr(ztr);
            mfdestroy(mf);
        } else {
            fwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, stdout);
            fwrite(srf->tb.trace,     1, srf->tb.trace_size,     stdout);
        }
        break;
    }
	
    return 0;
}