Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/progs/srf_extract_hash.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 #ifdef HAVE_CONFIG_H | |
2 #include "io_lib_config.h" | |
3 #endif | |
4 | |
5 #include <stdio.h> | |
6 #include <math.h> | |
7 #include <string.h> | |
8 #include <stdlib.h> | |
9 #include <fcntl.h> | |
10 #include <io_lib/hash_table.h> | |
11 #include <io_lib/srf.h> | |
12 | |
13 static char qlookup[256]; | |
14 void init_qlookup(void) { | |
15 int i; | |
16 for (i = -128; i < 128; i++) { | |
17 qlookup[i+128] = '!' + (int)((10*log(1+pow(10, i/10.0))/log(10)+.499)); | |
18 } | |
19 } | |
20 | |
21 /* ------------------------------------------------------------------------ */ | |
22 | |
23 #define MAX_READ_LEN 10000 | |
24 void ztr2fastq(ztr_t *z, char *name, int calibrated) { | |
25 int i, nc, seq_len; | |
26 char buf[MAX_READ_LEN*2 + 512 + 6]; | |
27 char *seq, *qual, *sdata, *qdata; | |
28 ztr_chunk_t **chunks; | |
29 | |
30 /* Extract the sequence only */ | |
31 chunks = ztr_find_chunks(z, ZTR_TYPE_BASE, &nc); | |
32 if (nc != 1) { | |
33 fprintf(stderr, "Zero or greater than one BASE chunks found.\n"); | |
34 return; | |
35 } | |
36 uncompress_chunk(z, chunks[0]); | |
37 sdata = chunks[0]->data+1; | |
38 seq_len = chunks[0]->dlength-1; | |
39 | |
40 /* Extract the quality */ | |
41 if (calibrated) | |
42 chunks = ztr_find_chunks(z, ZTR_TYPE_CNF1, &nc); | |
43 else | |
44 chunks = ztr_find_chunks(z, ZTR_TYPE_CNF4, &nc); | |
45 | |
46 if (nc != 1) { | |
47 fprintf(stderr, "Zero or greater than one CNF chunks found.\n"); | |
48 return; | |
49 } | |
50 uncompress_chunk(z, chunks[0]); | |
51 qdata = chunks[0]->data+1; | |
52 | |
53 /* Construct fastq entry */ | |
54 seq = buf; | |
55 *seq++ = '@'; | |
56 while (*name) | |
57 *seq++ = *name++; | |
58 *seq++ = '\n'; | |
59 | |
60 qual = seq + seq_len; | |
61 *qual++ = '\n'; | |
62 *qual++ = '+'; | |
63 *qual++ = '\n'; | |
64 | |
65 for (i = 0; i < seq_len; i++) { | |
66 if (*sdata != '.') { | |
67 *seq++ = *sdata++; | |
68 } else { | |
69 *seq++ = 'N'; | |
70 sdata++; | |
71 } | |
72 *qual++ = calibrated | |
73 ? *qdata++ + '!' | |
74 : qlookup[*qdata++ + 128]; | |
75 } | |
76 *qual++ = '\n'; | |
77 | |
78 fwrite(buf, 1, qual - buf, stdout); | |
79 } | |
80 | |
81 /* ------------------------------------------------------------------------ */ | |
82 void usage(void) { | |
83 fprintf(stderr, "Usage: srf_extract [-fastq] [-c] archive_name trace_name ...\n"); | |
84 exit(1); | |
85 } | |
86 | |
87 int main(int argc, char **argv) { | |
88 FILE *fp; | |
89 srf_t *srf; | |
90 char *archive, *trace; | |
91 uint64_t cpos, hpos, dpos; | |
92 int fastq = 0, calibrated = 0, i; | |
93 | |
94 /* Parse args */ | |
95 for (i = 1; i < argc && argv[i][0] == '-'; i++) { | |
96 if (!strcmp(argv[i], "-")) { | |
97 break; | |
98 } else if (!strcmp(argv[i], "-c")) { | |
99 calibrated = 1; | |
100 } else if (!strcmp(argv[i], "-fastq")) { | |
101 fastq = 1; | |
102 } else { | |
103 usage(); | |
104 } | |
105 } | |
106 | |
107 if (argc < (i+2)) { | |
108 usage(); | |
109 } | |
110 | |
111 /* the SRF archive */ | |
112 archive = argv[i++]; | |
113 fp = fopen(archive, "rb"); | |
114 if (NULL == fp) { | |
115 perror(archive); | |
116 return 1; | |
117 } | |
118 srf = srf_create(fp); | |
119 | |
120 /* the trace */ | |
121 trace = argv[i]; | |
122 | |
123 if( fastq ){ | |
124 read_sections(READ_BASES); | |
125 init_qlookup(); | |
126 }else{ | |
127 #ifdef _WIN32 | |
128 _setmode(_fileno(stdout), _O_BINARY); | |
129 #endif | |
130 } | |
131 | |
132 /* Search index */ | |
133 switch (srf_find_trace(srf, trace, &cpos, &hpos, &dpos)) { | |
134 case -1: | |
135 fprintf(stderr, "Malformed or missing index hash. " | |
136 "Consider running srf_index_hash\n"); | |
137 return 1; | |
138 | |
139 case -2: | |
140 fprintf(stderr, "%s: not found\n", trace); | |
141 break; | |
142 | |
143 default: | |
144 /* The srf object holds the latest data and trace header blocks */ | |
145 if( fastq ){ | |
146 mFILE *mf = mfcreate(NULL, 0); | |
147 mfwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, mf); | |
148 mfwrite(srf->tb.trace, 1, srf->tb.trace_size, mf); | |
149 mfseek(mf, 0, SEEK_SET); | |
150 ztr_t *ztr = partial_decode_ztr(srf, mf, NULL); | |
151 ztr2fastq(ztr, trace, calibrated); | |
152 delete_ztr(ztr); | |
153 mfdestroy(mf); | |
154 } else { | |
155 fwrite(srf->th.trace_hdr, 1, srf->th.trace_hdr_size, stdout); | |
156 fwrite(srf->tb.trace, 1, srf->tb.trace_size, stdout); | |
157 } | |
158 break; | |
159 } | |
160 | |
161 return 0; | |
162 } |