Mercurial > repos > dawe > srf2fastq
diff srf2fastq/io_lib-1.12.2/progs/srf_extract_hash.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/srf2fastq/io_lib-1.12.2/progs/srf_extract_hash.c Tue Jun 07 17:48:05 2011 -0400 @@ -0,0 +1,162 @@ +#ifdef HAVE_CONFIG_H +#include "io_lib_config.h" +#endif + +#include <stdio.h> +#include <math.h> +#include <string.h> +#include <stdlib.h> +#include <fcntl.h> +#include <io_lib/hash_table.h> +#include <io_lib/srf.h> + +static char qlookup[256]; +void init_qlookup(void) { + int i; + for (i = -128; i < 128; i++) { + qlookup[i+128] = '!' + (int)((10*log(1+pow(10, i/10.0))/log(10)+.499)); + } +} + +/* ------------------------------------------------------------------------ */ + +#define MAX_READ_LEN 10000 +void ztr2fastq(ztr_t *z, char *name, int calibrated) { + int i, nc, seq_len; + char buf[MAX_READ_LEN*2 + 512 + 6]; + char *seq, *qual, *sdata, *qdata; + ztr_chunk_t **chunks; + + /* Extract the sequence only */ + chunks = ztr_find_chunks(z, ZTR_TYPE_BASE, &nc); + if (nc != 1) { + fprintf(stderr, "Zero or greater than one BASE chunks found.\n"); + return; + } + uncompress_chunk(z, chunks[0]); + sdata = chunks[0]->data+1; + seq_len = chunks[0]->dlength-1; + + /* Extract the quality */ + if (calibrated) + chunks = ztr_find_chunks(z, ZTR_TYPE_CNF1, &nc); + else + chunks = ztr_find_chunks(z, ZTR_TYPE_CNF4, &nc); + + if (nc != 1) { + fprintf(stderr, "Zero or greater than one CNF chunks found.\n"); + return; + } + uncompress_chunk(z, chunks[0]); + qdata = chunks[0]->data+1; + + /* Construct fastq entry */ + seq = buf; + *seq++ = '@'; + while (*name) + *seq++ = *name++; + *seq++ = '\n'; + + qual = seq + seq_len; + *qual++ = '\n'; + *qual++ = '+'; + *qual++ = '\n'; + + for (i = 0; i < seq_len; i++) { + if (*sdata != '.') { + *seq++ = *sdata++; + } else { + *seq++ = 'N'; + sdata++; + } + *qual++ = calibrated + ? *qdata++ + '!' + : qlookup[*qdata++ + 128]; + } + *qual++ = '\n'; + + fwrite(buf, 1, qual - buf, stdout); +} + +/* ------------------------------------------------------------------------ */ +void usage(void) { + fprintf(stderr, "Usage: srf_extract [-fastq] [-c] archive_name trace_name ...\n"); + exit(1); +} + +int main(int argc, char **argv) { + FILE *fp; + srf_t *srf; + char *archive, *trace; + uint64_t cpos, hpos, dpos; + int fastq = 0, calibrated = 0, i; + + /* Parse args */ + for (i = 1; i < argc && argv[i][0] == '-'; i++) { + if (!strcmp(argv[i], "-")) { + break; + } else if (!strcmp(argv[i], "-c")) { + calibrated = 1; + } else if (!strcmp(argv[i], "-fastq")) { + fastq = 1; + } else { + usage(); + } + } + + if (argc < (i+2)) { + usage(); + } + + /* the SRF archive */ + archive = argv[i++]; + fp = fopen(archive, "rb"); + if (NULL == fp) { + perror(archive); + return 1; + } + srf = srf_create(fp); + + /* the trace */ + trace = argv[i]; + + if( fastq ){ + read_sections(READ_BASES); + init_qlookup(); + }else{ +#ifdef _WIN32 + _setmode(_fileno(stdout), _O_BINARY); +#endif + } + + /* Search index */ + switch (srf_find_trace(srf, trace, &cpos, &hpos, &dpos)) { + case -1: + fprintf(stderr, "Malformed or missing index hash. " + "Consider running srf_index_hash\n"); + return 1; + + case -2: + fprintf(stderr, "%s: not found\n", trace); + break; + + default: + /* The srf object holds the latest data and trace header blocks */ + if( fastq ){ + mFILE *mf = mfcreate(NULL, 0); + mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, mf); + mfwrite(srf->tb.trace, 1, srf->tb.trace_size, mf); + mfseek(mf, 0, SEEK_SET); + ztr_t *ztr = partial_decode_ztr(srf, mf, NULL); + ztr2fastq(ztr, trace, calibrated); + delete_ztr(ztr); + mfdestroy(mf); + } else { + fwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, stdout); + fwrite(srf->tb.trace, 1, srf->tb.trace_size, stdout); + } + break; + } + + return 0; +}