Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/progs/extract_qual.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 /* | |
2 * Copyright (c) Medical Research Council 1994-1999. All rights reserved. | |
3 * | |
4 * Permission to use, copy, modify and distribute this software and its | |
5 * documentation for any purpose is hereby granted without fee, provided that | |
6 * this copyright and notice appears in all copies. | |
7 * | |
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden, | |
9 * as part of the Staden Package at the MRC Laboratory of Molecular | |
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. | |
11 * | |
12 * MRC disclaims all warranties with regard to this software. | |
13 */ | |
14 | |
15 #include <stdio.h> | |
16 #include <errno.h> | |
17 #include <string.h> | |
18 #include <strings.h> | |
19 #include <stdlib.h> | |
20 #include <unistd.h> | |
21 #include <io_lib/Read.h> | |
22 #include <io_lib/traceType.h> | |
23 #include <io_lib/expFileIO.h> | |
24 #include <io_lib/open_trace_file.h> | |
25 #include <io_lib/xalloc.h> | |
26 | |
27 /* #include "stdio_hack.h" */ | |
28 | |
29 #define LINE_LENGTH 60 | |
30 | |
31 /* | |
32 * Converts the confidence array to the accuracy value string (AV). | |
33 * | |
34 * Note no memory overrun checks are performed on buf. It is recommended | |
35 * that it is allocated to 4*len (worst case of "100 " for each base). | |
36 * | |
37 * Returns the buf argument. | |
38 */ | |
39 static char *my_conf2str(int1 *conf, int len, char *buf) { | |
40 int i; | |
41 char *ret = buf, *rs = buf; | |
42 | |
43 for (i = 0; i < len; i++) { | |
44 sprintf(buf, "%d ", conf[i]); | |
45 buf += strlen(buf); | |
46 | |
47 if (buf - rs > LINE_LENGTH) { | |
48 *buf++ = '\n'; | |
49 *buf = '\0'; | |
50 rs = buf; | |
51 } | |
52 } | |
53 | |
54 return ret; | |
55 } | |
56 | |
57 static int do_trans(mFILE *infp, char *in_file, FILE *outfp, int format, | |
58 int good_only, int clip_cosmid, int fasta_out) { | |
59 Read *r; | |
60 char *tmp_prob_A, *tmp_prob_C, *tmp_prob_G, *tmp_prob_T; | |
61 char *cstr = NULL; | |
62 | |
63 read_sections(READ_BASES); | |
64 if (NULL == (r = mfread_reading(infp, in_file, format))) { | |
65 fprintf(stderr, "Failed to read file '%s'\n", in_file); | |
66 return 1; | |
67 } | |
68 | |
69 tmp_prob_A = r->prob_A; | |
70 tmp_prob_C = r->prob_C; | |
71 tmp_prob_G = r->prob_G; | |
72 tmp_prob_T = r->prob_T; | |
73 | |
74 #ifdef IOLIB_EXP | |
75 if (good_only && r->orig_trace_format == TT_EXP) { | |
76 int left=0, right=r->NBases + 1, val, lval, rval; | |
77 Exp_info *e = (Exp_info *)r->orig_trace; | |
78 | |
79 if (0 == exp_get_int(e, EFLT_SL, &val)) | |
80 if (val > left) | |
81 left = val; | |
82 if (0 == exp_get_int(e, EFLT_QL, &val)) | |
83 if (val > left) | |
84 left = val; | |
85 | |
86 if (0 == exp_get_int(e, EFLT_SR, &val)) | |
87 if (val < right) | |
88 right = val; | |
89 if (0 == exp_get_int(e, EFLT_QR, &val)) | |
90 if (val < right) | |
91 right = val; | |
92 | |
93 /* This is horrid - see gap seqInfo.c file for explaination */ | |
94 if (clip_cosmid) { | |
95 int got_cosmid; | |
96 | |
97 if (0 == exp_get_rng(e, EFLT_CS, &lval, &rval)) { | |
98 got_cosmid = 1; | |
99 } else if (0 == exp_get_int(e, EFLT_CL, &lval) && | |
100 0 == exp_get_int(e, EFLT_CR, &rval)) { | |
101 got_cosmid = 1; | |
102 } else { | |
103 got_cosmid = 0; | |
104 } | |
105 | |
106 if (got_cosmid) { | |
107 if (lval <= left && rval <= left) ; | |
108 else if (lval <= left+1 && rval < right) left = rval; | |
109 else if (lval <= left+1 && rval >= right) right = left+1; | |
110 else if (lval < right && rval < right) right = lval; | |
111 else if (lval < right && rval >= right) right = lval; | |
112 } | |
113 } | |
114 | |
115 r->prob_A += left; | |
116 r->prob_C += left; | |
117 r->prob_G += left; | |
118 r->prob_T += left; | |
119 r->NBases = right - left - 1; | |
120 } else | |
121 #endif /* IOLIB_EXP */ | |
122 if (good_only) { | |
123 r->prob_A += r->leftCutoff; | |
124 r->prob_C += r->leftCutoff; | |
125 r->prob_G += r->leftCutoff; | |
126 r->prob_T += r->leftCutoff; | |
127 r->NBases = r->rightCutoff - r->leftCutoff - 1; | |
128 } | |
129 | |
130 /* Confidence values */ | |
131 if (r->prob_A && r->prob_C && r->prob_G && r->prob_T && | |
132 r->NBases > 0) { | |
133 int i; | |
134 | |
135 /* We have some, but are they non zero values? */ | |
136 for (i = 0; i < r->NBases; i++) { | |
137 if (r->prob_A[i] || r->prob_C[i] || | |
138 r->prob_G[i] || r->prob_T[i]) | |
139 break; | |
140 } | |
141 if (i != r->NBases) { | |
142 int1 *conf = (int1 *)xmalloc(r->NBases); | |
143 cstr = (char *)xmalloc(r->NBases * 4 + 2); | |
144 | |
145 for (i = 0; i < r->NBases; i++) { | |
146 switch (r->base[i]) { | |
147 case 'a': | |
148 case 'A': | |
149 conf[i] = r->prob_A[i]; | |
150 break; | |
151 case 'c': | |
152 case 'C': | |
153 conf[i] = r->prob_C[i]; | |
154 break; | |
155 case 'g': | |
156 case 'G': | |
157 conf[i] = r->prob_G[i]; | |
158 break; | |
159 case 't': | |
160 case 'T': | |
161 conf[i] = r->prob_T[i]; | |
162 break; | |
163 case 'b': | |
164 case 'B': | |
165 conf[i] = (r->prob_C[i] + r->prob_G[i] + r->prob_T[i]) / 3; | |
166 break; | |
167 case 'd': | |
168 case 'D': | |
169 conf[i] = (r->prob_A[i] + r->prob_G[i] + r->prob_T[i]) / 3; | |
170 break; | |
171 case 'h': | |
172 case 'H': | |
173 conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_T[i]) / 3; | |
174 break; | |
175 case 'v': | |
176 case 'V': | |
177 conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_G[i]) / 3; | |
178 break; | |
179 case 'k': | |
180 case 'K': | |
181 conf[i] = (r->prob_G[i] + r->prob_T[i]) / 2; | |
182 break; | |
183 case 'm': | |
184 case 'M': | |
185 conf[i] = (r->prob_A[i] + r->prob_C[i]) / 2; | |
186 break; | |
187 case 'r': | |
188 case 'R': | |
189 conf[i] = (r->prob_A[i] + r->prob_G[i]) / 2; | |
190 break; | |
191 case 's': | |
192 case 'S': | |
193 conf[i] = (r->prob_C[i] + r->prob_G[i]) / 2; | |
194 break; | |
195 case 'w': | |
196 case 'W': | |
197 conf[i] = (r->prob_A[i] + r->prob_T[i]) / 2; | |
198 break; | |
199 case 'y': | |
200 case 'Y': | |
201 conf[i] = (r->prob_C[i] + r->prob_T[i]) / 2; | |
202 break; | |
203 default: | |
204 conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_G[i] + r->prob_T[i]) / 4; | |
205 } | |
206 } | |
207 | |
208 my_conf2str(conf, r->NBases, cstr); | |
209 xfree(conf); | |
210 } | |
211 } | |
212 | |
213 if (fasta_out) { | |
214 char *p = strrchr(in_file, '/'); | |
215 /* Add header */ | |
216 if (NULL == p) | |
217 p = in_file; | |
218 else | |
219 p++; | |
220 fprintf(outfp, ">%s\n", p); | |
221 } | |
222 | |
223 if (cstr) { | |
224 fprintf(outfp,"%s\n", cstr); | |
225 xfree(cstr); | |
226 } | |
227 | |
228 r->prob_A = tmp_prob_A; | |
229 r->prob_C = tmp_prob_C; | |
230 r->prob_G = tmp_prob_G; | |
231 r->prob_T = tmp_prob_T; | |
232 read_deallocate(r); | |
233 fflush(outfp); | |
234 | |
235 return 0; | |
236 } | |
237 | |
238 static void usage(void) { | |
239 fprintf(stderr, "Usage: extract_qual [-r] [-(abi|alf|scf|exp|pln|ctf|ztr)]\n" | |
240 " [-good_only] [-clip_cosmid] [-fasta_out]\n" | |
241 " [-output output_name] [input_name] ...\n"); | |
242 exit(1); | |
243 } | |
244 | |
245 int main(int argc, char **argv) { | |
246 int from_stdin = 1; | |
247 mFILE *infp = mstdin(); | |
248 FILE *outfp = stdout; | |
249 int format = TT_ANY; | |
250 int redirect = 1; | |
251 int good_only = 0; | |
252 int clip_cosmid = 0; | |
253 int fasta_out = 0; | |
254 int ret = 0; | |
255 char *fofn = NULL; | |
256 | |
257 for (argc--, argv++; argc > 0; argc--, argv++) { | |
258 if (strcmp(*argv, "-r") == 0) { | |
259 redirect = 0; | |
260 } else if (strcasecmp(*argv, "-abi") == 0) { | |
261 format = TT_ABI; | |
262 } else if (strcasecmp(*argv, "-alf") == 0) { | |
263 format = TT_ALF; | |
264 } else if (strcasecmp(*argv, "-scf") == 0) { | |
265 format = TT_SCF; | |
266 } else if (strcasecmp(*argv, "-exp") == 0) { | |
267 format = TT_EXP; | |
268 } else if (strcasecmp(*argv, "-pln") == 0) { | |
269 format = TT_PLN; | |
270 } else if (strcasecmp(*argv, "-ztr") == 0) { | |
271 format = TT_ZTR; | |
272 } else if (strcasecmp(*argv, "-ctf") == 0) { | |
273 format = TT_CTF; | |
274 } else if (strcasecmp(*argv, "-good_only") == 0) { | |
275 good_only = 1; | |
276 } else if (strcasecmp(*argv, "-clip_cosmid") == 0) { | |
277 clip_cosmid = 1; | |
278 } else if (strcasecmp(*argv, "-fasta_out") == 0) { | |
279 fasta_out = 1; | |
280 } else if (strcmp(*argv, "-fofn") == 0) { | |
281 fofn = *++argv; | |
282 argc--; | |
283 from_stdin = 0; | |
284 } else if (strcasecmp(*argv, "-output") == 0) { | |
285 if (NULL == (outfp = fopen(*++argv, "wb"))) { | |
286 perror(*argv); | |
287 return 1; | |
288 } | |
289 argc--; | |
290 } else if (**argv != '-') { | |
291 from_stdin = 0; | |
292 break; | |
293 } else { | |
294 usage(); | |
295 } | |
296 } | |
297 | |
298 read_experiment_redirect(redirect); | |
299 | |
300 if (!from_stdin) { | |
301 if (fofn) { | |
302 FILE *fofn_fp; | |
303 char line[8192]; | |
304 | |
305 if (strcmp(fofn, "stdin") == 0) | |
306 fofn_fp = stdin; | |
307 else | |
308 fofn_fp = fopen(fofn, "r"); | |
309 | |
310 if (fofn_fp) { | |
311 while (fgets(line, 8192, fofn_fp) != NULL) { | |
312 char *cp; | |
313 if (cp = strchr(line, '\n')) | |
314 *cp = 0; | |
315 if (format == TT_EXP) { | |
316 infp = open_exp_mfile(line, NULL); | |
317 } else { | |
318 infp = open_trace_mfile(line, NULL); | |
319 } | |
320 if (NULL == infp) { | |
321 perror(line); | |
322 ret = 1; | |
323 } else { | |
324 ret |= do_trans(infp, line, outfp, format, good_only, | |
325 clip_cosmid, fasta_out); | |
326 mfclose(infp); | |
327 } | |
328 } | |
329 fclose(fofn_fp); | |
330 } | |
331 } | |
332 for (;argc > 0; argc--, argv++) { | |
333 if (format == TT_EXP) { | |
334 infp = open_exp_mfile(*argv, NULL); | |
335 } else { | |
336 infp = open_trace_mfile(*argv, NULL); | |
337 } | |
338 if (NULL == infp) { | |
339 perror(*argv); | |
340 ret = 1; | |
341 } else { | |
342 ret |= do_trans(infp, *argv, outfp, format, good_only, | |
343 clip_cosmid, fasta_out); | |
344 mfclose(infp); | |
345 } | |
346 } | |
347 } else { | |
348 ret = do_trans(infp, "<stdin>", outfp, format, good_only, clip_cosmid, | |
349 fasta_out); | |
350 } | |
351 | |
352 return ret; | |
353 } | |
354 |