comparison srf2fastq/io_lib-1.12.2/progs/extract_qual.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 /*
2 * Copyright (c) Medical Research Council 1994-1999. All rights reserved.
3 *
4 * Permission to use, copy, modify and distribute this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * this copyright and notice appears in all copies.
7 *
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
9 * as part of the Staden Package at the MRC Laboratory of Molecular
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
11 *
12 * MRC disclaims all warranties with regard to this software.
13 */
14
15 #include <stdio.h>
16 #include <errno.h>
17 #include <string.h>
18 #include <strings.h>
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <io_lib/Read.h>
22 #include <io_lib/traceType.h>
23 #include <io_lib/expFileIO.h>
24 #include <io_lib/open_trace_file.h>
25 #include <io_lib/xalloc.h>
26
27 /* #include "stdio_hack.h" */
28
29 #define LINE_LENGTH 60
30
31 /*
32 * Converts the confidence array to the accuracy value string (AV).
33 *
34 * Note no memory overrun checks are performed on buf. It is recommended
35 * that it is allocated to 4*len (worst case of "100 " for each base).
36 *
37 * Returns the buf argument.
38 */
39 static char *my_conf2str(int1 *conf, int len, char *buf) {
40 int i;
41 char *ret = buf, *rs = buf;
42
43 for (i = 0; i < len; i++) {
44 sprintf(buf, "%d ", conf[i]);
45 buf += strlen(buf);
46
47 if (buf - rs > LINE_LENGTH) {
48 *buf++ = '\n';
49 *buf = '\0';
50 rs = buf;
51 }
52 }
53
54 return ret;
55 }
56
57 static int do_trans(mFILE *infp, char *in_file, FILE *outfp, int format,
58 int good_only, int clip_cosmid, int fasta_out) {
59 Read *r;
60 char *tmp_prob_A, *tmp_prob_C, *tmp_prob_G, *tmp_prob_T;
61 char *cstr = NULL;
62
63 read_sections(READ_BASES);
64 if (NULL == (r = mfread_reading(infp, in_file, format))) {
65 fprintf(stderr, "Failed to read file '%s'\n", in_file);
66 return 1;
67 }
68
69 tmp_prob_A = r->prob_A;
70 tmp_prob_C = r->prob_C;
71 tmp_prob_G = r->prob_G;
72 tmp_prob_T = r->prob_T;
73
74 #ifdef IOLIB_EXP
75 if (good_only && r->orig_trace_format == TT_EXP) {
76 int left=0, right=r->NBases + 1, val, lval, rval;
77 Exp_info *e = (Exp_info *)r->orig_trace;
78
79 if (0 == exp_get_int(e, EFLT_SL, &val))
80 if (val > left)
81 left = val;
82 if (0 == exp_get_int(e, EFLT_QL, &val))
83 if (val > left)
84 left = val;
85
86 if (0 == exp_get_int(e, EFLT_SR, &val))
87 if (val < right)
88 right = val;
89 if (0 == exp_get_int(e, EFLT_QR, &val))
90 if (val < right)
91 right = val;
92
93 /* This is horrid - see gap seqInfo.c file for explaination */
94 if (clip_cosmid) {
95 int got_cosmid;
96
97 if (0 == exp_get_rng(e, EFLT_CS, &lval, &rval)) {
98 got_cosmid = 1;
99 } else if (0 == exp_get_int(e, EFLT_CL, &lval) &&
100 0 == exp_get_int(e, EFLT_CR, &rval)) {
101 got_cosmid = 1;
102 } else {
103 got_cosmid = 0;
104 }
105
106 if (got_cosmid) {
107 if (lval <= left && rval <= left) ;
108 else if (lval <= left+1 && rval < right) left = rval;
109 else if (lval <= left+1 && rval >= right) right = left+1;
110 else if (lval < right && rval < right) right = lval;
111 else if (lval < right && rval >= right) right = lval;
112 }
113 }
114
115 r->prob_A += left;
116 r->prob_C += left;
117 r->prob_G += left;
118 r->prob_T += left;
119 r->NBases = right - left - 1;
120 } else
121 #endif /* IOLIB_EXP */
122 if (good_only) {
123 r->prob_A += r->leftCutoff;
124 r->prob_C += r->leftCutoff;
125 r->prob_G += r->leftCutoff;
126 r->prob_T += r->leftCutoff;
127 r->NBases = r->rightCutoff - r->leftCutoff - 1;
128 }
129
130 /* Confidence values */
131 if (r->prob_A && r->prob_C && r->prob_G && r->prob_T &&
132 r->NBases > 0) {
133 int i;
134
135 /* We have some, but are they non zero values? */
136 for (i = 0; i < r->NBases; i++) {
137 if (r->prob_A[i] || r->prob_C[i] ||
138 r->prob_G[i] || r->prob_T[i])
139 break;
140 }
141 if (i != r->NBases) {
142 int1 *conf = (int1 *)xmalloc(r->NBases);
143 cstr = (char *)xmalloc(r->NBases * 4 + 2);
144
145 for (i = 0; i < r->NBases; i++) {
146 switch (r->base[i]) {
147 case 'a':
148 case 'A':
149 conf[i] = r->prob_A[i];
150 break;
151 case 'c':
152 case 'C':
153 conf[i] = r->prob_C[i];
154 break;
155 case 'g':
156 case 'G':
157 conf[i] = r->prob_G[i];
158 break;
159 case 't':
160 case 'T':
161 conf[i] = r->prob_T[i];
162 break;
163 case 'b':
164 case 'B':
165 conf[i] = (r->prob_C[i] + r->prob_G[i] + r->prob_T[i]) / 3;
166 break;
167 case 'd':
168 case 'D':
169 conf[i] = (r->prob_A[i] + r->prob_G[i] + r->prob_T[i]) / 3;
170 break;
171 case 'h':
172 case 'H':
173 conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_T[i]) / 3;
174 break;
175 case 'v':
176 case 'V':
177 conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_G[i]) / 3;
178 break;
179 case 'k':
180 case 'K':
181 conf[i] = (r->prob_G[i] + r->prob_T[i]) / 2;
182 break;
183 case 'm':
184 case 'M':
185 conf[i] = (r->prob_A[i] + r->prob_C[i]) / 2;
186 break;
187 case 'r':
188 case 'R':
189 conf[i] = (r->prob_A[i] + r->prob_G[i]) / 2;
190 break;
191 case 's':
192 case 'S':
193 conf[i] = (r->prob_C[i] + r->prob_G[i]) / 2;
194 break;
195 case 'w':
196 case 'W':
197 conf[i] = (r->prob_A[i] + r->prob_T[i]) / 2;
198 break;
199 case 'y':
200 case 'Y':
201 conf[i] = (r->prob_C[i] + r->prob_T[i]) / 2;
202 break;
203 default:
204 conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_G[i] + r->prob_T[i]) / 4;
205 }
206 }
207
208 my_conf2str(conf, r->NBases, cstr);
209 xfree(conf);
210 }
211 }
212
213 if (fasta_out) {
214 char *p = strrchr(in_file, '/');
215 /* Add header */
216 if (NULL == p)
217 p = in_file;
218 else
219 p++;
220 fprintf(outfp, ">%s\n", p);
221 }
222
223 if (cstr) {
224 fprintf(outfp,"%s\n", cstr);
225 xfree(cstr);
226 }
227
228 r->prob_A = tmp_prob_A;
229 r->prob_C = tmp_prob_C;
230 r->prob_G = tmp_prob_G;
231 r->prob_T = tmp_prob_T;
232 read_deallocate(r);
233 fflush(outfp);
234
235 return 0;
236 }
237
238 static void usage(void) {
239 fprintf(stderr, "Usage: extract_qual [-r] [-(abi|alf|scf|exp|pln|ctf|ztr)]\n"
240 " [-good_only] [-clip_cosmid] [-fasta_out]\n"
241 " [-output output_name] [input_name] ...\n");
242 exit(1);
243 }
244
245 int main(int argc, char **argv) {
246 int from_stdin = 1;
247 mFILE *infp = mstdin();
248 FILE *outfp = stdout;
249 int format = TT_ANY;
250 int redirect = 1;
251 int good_only = 0;
252 int clip_cosmid = 0;
253 int fasta_out = 0;
254 int ret = 0;
255 char *fofn = NULL;
256
257 for (argc--, argv++; argc > 0; argc--, argv++) {
258 if (strcmp(*argv, "-r") == 0) {
259 redirect = 0;
260 } else if (strcasecmp(*argv, "-abi") == 0) {
261 format = TT_ABI;
262 } else if (strcasecmp(*argv, "-alf") == 0) {
263 format = TT_ALF;
264 } else if (strcasecmp(*argv, "-scf") == 0) {
265 format = TT_SCF;
266 } else if (strcasecmp(*argv, "-exp") == 0) {
267 format = TT_EXP;
268 } else if (strcasecmp(*argv, "-pln") == 0) {
269 format = TT_PLN;
270 } else if (strcasecmp(*argv, "-ztr") == 0) {
271 format = TT_ZTR;
272 } else if (strcasecmp(*argv, "-ctf") == 0) {
273 format = TT_CTF;
274 } else if (strcasecmp(*argv, "-good_only") == 0) {
275 good_only = 1;
276 } else if (strcasecmp(*argv, "-clip_cosmid") == 0) {
277 clip_cosmid = 1;
278 } else if (strcasecmp(*argv, "-fasta_out") == 0) {
279 fasta_out = 1;
280 } else if (strcmp(*argv, "-fofn") == 0) {
281 fofn = *++argv;
282 argc--;
283 from_stdin = 0;
284 } else if (strcasecmp(*argv, "-output") == 0) {
285 if (NULL == (outfp = fopen(*++argv, "wb"))) {
286 perror(*argv);
287 return 1;
288 }
289 argc--;
290 } else if (**argv != '-') {
291 from_stdin = 0;
292 break;
293 } else {
294 usage();
295 }
296 }
297
298 read_experiment_redirect(redirect);
299
300 if (!from_stdin) {
301 if (fofn) {
302 FILE *fofn_fp;
303 char line[8192];
304
305 if (strcmp(fofn, "stdin") == 0)
306 fofn_fp = stdin;
307 else
308 fofn_fp = fopen(fofn, "r");
309
310 if (fofn_fp) {
311 while (fgets(line, 8192, fofn_fp) != NULL) {
312 char *cp;
313 if (cp = strchr(line, '\n'))
314 *cp = 0;
315 if (format == TT_EXP) {
316 infp = open_exp_mfile(line, NULL);
317 } else {
318 infp = open_trace_mfile(line, NULL);
319 }
320 if (NULL == infp) {
321 perror(line);
322 ret = 1;
323 } else {
324 ret |= do_trans(infp, line, outfp, format, good_only,
325 clip_cosmid, fasta_out);
326 mfclose(infp);
327 }
328 }
329 fclose(fofn_fp);
330 }
331 }
332 for (;argc > 0; argc--, argv++) {
333 if (format == TT_EXP) {
334 infp = open_exp_mfile(*argv, NULL);
335 } else {
336 infp = open_trace_mfile(*argv, NULL);
337 }
338 if (NULL == infp) {
339 perror(*argv);
340 ret = 1;
341 } else {
342 ret |= do_trans(infp, *argv, outfp, format, good_only,
343 clip_cosmid, fasta_out);
344 mfclose(infp);
345 }
346 }
347 } else {
348 ret = do_trans(infp, "<stdin>", outfp, format, good_only, clip_cosmid,
349 fasta_out);
350 }
351
352 return ret;
353 }
354