comparison srf2fastq/io_lib-1.12.2/progs/srf_extract_hash.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 #ifdef HAVE_CONFIG_H
2 #include "io_lib_config.h"
3 #endif
4
5 #include <stdio.h>
6 #include <math.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <fcntl.h>
10 #include <io_lib/hash_table.h>
11 #include <io_lib/srf.h>
12
13 static char qlookup[256];
14 void init_qlookup(void) {
15 int i;
16 for (i = -128; i < 128; i++) {
17 qlookup[i+128] = '!' + (int)((10*log(1+pow(10, i/10.0))/log(10)+.499));
18 }
19 }
20
21 /* ------------------------------------------------------------------------ */
22
23 #define MAX_READ_LEN 10000
24 void ztr2fastq(ztr_t *z, char *name, int calibrated) {
25 int i, nc, seq_len;
26 char buf[MAX_READ_LEN*2 + 512 + 6];
27 char *seq, *qual, *sdata, *qdata;
28 ztr_chunk_t **chunks;
29
30 /* Extract the sequence only */
31 chunks = ztr_find_chunks(z, ZTR_TYPE_BASE, &nc);
32 if (nc != 1) {
33 fprintf(stderr, "Zero or greater than one BASE chunks found.\n");
34 return;
35 }
36 uncompress_chunk(z, chunks[0]);
37 sdata = chunks[0]->data+1;
38 seq_len = chunks[0]->dlength-1;
39
40 /* Extract the quality */
41 if (calibrated)
42 chunks = ztr_find_chunks(z, ZTR_TYPE_CNF1, &nc);
43 else
44 chunks = ztr_find_chunks(z, ZTR_TYPE_CNF4, &nc);
45
46 if (nc != 1) {
47 fprintf(stderr, "Zero or greater than one CNF chunks found.\n");
48 return;
49 }
50 uncompress_chunk(z, chunks[0]);
51 qdata = chunks[0]->data+1;
52
53 /* Construct fastq entry */
54 seq = buf;
55 *seq++ = '@';
56 while (*name)
57 *seq++ = *name++;
58 *seq++ = '\n';
59
60 qual = seq + seq_len;
61 *qual++ = '\n';
62 *qual++ = '+';
63 *qual++ = '\n';
64
65 for (i = 0; i < seq_len; i++) {
66 if (*sdata != '.') {
67 *seq++ = *sdata++;
68 } else {
69 *seq++ = 'N';
70 sdata++;
71 }
72 *qual++ = calibrated
73 ? *qdata++ + '!'
74 : qlookup[*qdata++ + 128];
75 }
76 *qual++ = '\n';
77
78 fwrite(buf, 1, qual - buf, stdout);
79 }
80
81 /* ------------------------------------------------------------------------ */
82 void usage(void) {
83 fprintf(stderr, "Usage: srf_extract [-fastq] [-c] archive_name trace_name ...\n");
84 exit(1);
85 }
86
87 int main(int argc, char **argv) {
88 FILE *fp;
89 srf_t *srf;
90 char *archive, *trace;
91 uint64_t cpos, hpos, dpos;
92 int fastq = 0, calibrated = 0, i;
93
94 /* Parse args */
95 for (i = 1; i < argc && argv[i][0] == '-'; i++) {
96 if (!strcmp(argv[i], "-")) {
97 break;
98 } else if (!strcmp(argv[i], "-c")) {
99 calibrated = 1;
100 } else if (!strcmp(argv[i], "-fastq")) {
101 fastq = 1;
102 } else {
103 usage();
104 }
105 }
106
107 if (argc < (i+2)) {
108 usage();
109 }
110
111 /* the SRF archive */
112 archive = argv[i++];
113 fp = fopen(archive, "rb");
114 if (NULL == fp) {
115 perror(archive);
116 return 1;
117 }
118 srf = srf_create(fp);
119
120 /* the trace */
121 trace = argv[i];
122
123 if( fastq ){
124 read_sections(READ_BASES);
125 init_qlookup();
126 }else{
127 #ifdef _WIN32
128 _setmode(_fileno(stdout), _O_BINARY);
129 #endif
130 }
131
132 /* Search index */
133 switch (srf_find_trace(srf, trace, &cpos, &hpos, &dpos)) {
134 case -1:
135 fprintf(stderr, "Malformed or missing index hash. "
136 "Consider running srf_index_hash\n");
137 return 1;
138
139 case -2:
140 fprintf(stderr, "%s: not found\n", trace);
141 break;
142
143 default:
144 /* The srf object holds the latest data and trace header blocks */
145 if( fastq ){
146 mFILE *mf = mfcreate(NULL, 0);
147 mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, mf);
148 mfwrite(srf->tb.trace, 1, srf->tb.trace_size, mf);
149 mfseek(mf, 0, SEEK_SET);
150 ztr_t *ztr = partial_decode_ztr(srf, mf, NULL);
151 ztr2fastq(ztr, trace, calibrated);
152 delete_ztr(ztr);
153 mfdestroy(mf);
154 } else {
155 fwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, stdout);
156 fwrite(srf->tb.trace, 1, srf->tb.trace_size, stdout);
157 }
158 break;
159 }
160
161 return 0;
162 }