Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/progs/extract_seq.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 /* | |
2 * Copyright (c) Medical Research Council 1994-1999. All rights reserved. | |
3 * | |
4 * Permission to use, copy, modify and distribute this software and its | |
5 * documentation for any purpose is hereby granted without fee, provided that | |
6 * this copyright and notice appears in all copies. | |
7 * | |
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden, | |
9 * as part of the Staden Package at the MRC Laboratory of Molecular | |
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. | |
11 * | |
12 * MRC disclaims all warranties with regard to this software. | |
13 */ | |
14 | |
15 #include <stdio.h> | |
16 #include <errno.h> | |
17 #include <string.h> | |
18 #include <strings.h> | |
19 #include <stdlib.h> | |
20 #include <unistd.h> | |
21 #include <io_lib/Read.h> | |
22 #include <io_lib/traceType.h> | |
23 #include <io_lib/expFileIO.h> | |
24 #include <io_lib/open_trace_file.h> | |
25 | |
26 /* #include "stdio_hack.h" */ | |
27 | |
28 #define LINE_LENGTH 60 | |
29 | |
30 static int do_trans(mFILE *infp, char *in_file, FILE *outfp, int format, | |
31 int good_only, int clip_cosmid, int fasta_out) { | |
32 Read *r; | |
33 char *tmp_base; | |
34 | |
35 read_sections(READ_BASES); | |
36 if (NULL == (r = mfread_reading(infp, in_file, format))) { | |
37 fprintf(stderr, "Failed to read file '%s'\n", in_file); | |
38 return 1; | |
39 } | |
40 | |
41 tmp_base = r->base; | |
42 | |
43 #ifdef IOLIB_EXP | |
44 if (good_only && r->orig_trace_format == TT_EXP) { | |
45 int left=0, right=r->NBases + 1, val, lval, rval; | |
46 Exp_info *e = (Exp_info *)r->orig_trace; | |
47 | |
48 if (0 == exp_get_int(e, EFLT_SL, &val)) | |
49 if (val > left) | |
50 left = val; | |
51 if (0 == exp_get_int(e, EFLT_QL, &val)) | |
52 if (val > left) | |
53 left = val; | |
54 | |
55 if (0 == exp_get_int(e, EFLT_SR, &val)) | |
56 if (val < right) | |
57 right = val; | |
58 if (0 == exp_get_int(e, EFLT_QR, &val)) | |
59 if (val < right) | |
60 right = val; | |
61 | |
62 /* This is horrid - see gap seqInfo.c file for explaination */ | |
63 if (clip_cosmid) { | |
64 int got_cosmid; | |
65 | |
66 if (0 == exp_get_rng(e, EFLT_CS, &lval, &rval)) { | |
67 got_cosmid = 1; | |
68 } else if (0 == exp_get_int(e, EFLT_CL, &lval) && | |
69 0 == exp_get_int(e, EFLT_CR, &rval)) { | |
70 got_cosmid = 1; | |
71 } else { | |
72 got_cosmid = 0; | |
73 } | |
74 | |
75 if (got_cosmid) { | |
76 if (lval <= left && rval <= left) ; | |
77 else if (lval <= left+1 && rval < right) left = rval; | |
78 else if (lval <= left+1 && rval >= right) right = left+1; | |
79 else if (lval < right && rval < right) right = lval; | |
80 else if (lval < right && rval >= right) right = lval; | |
81 } | |
82 } | |
83 | |
84 r->base += left; | |
85 r->NBases = right - left - 1; | |
86 } else | |
87 #endif /* IOLIB_EXP */ | |
88 if (good_only) { | |
89 r->base += r->leftCutoff; | |
90 r->NBases = r->rightCutoff - r->leftCutoff - 1; | |
91 } | |
92 | |
93 if (fasta_out) { | |
94 char *p = strrchr(in_file, '/'); | |
95 int i; | |
96 | |
97 /* Add header */ | |
98 if (NULL == p) | |
99 p = in_file; | |
100 else | |
101 p++; | |
102 fprintf(outfp, ">%s\n", p); | |
103 | |
104 /* Replace - with N */ | |
105 for (i = 0; i < r->NBases; i++) { | |
106 if (r->base[i] == '-') | |
107 r->base[i] = 'N'; | |
108 } | |
109 } | |
110 set_compression_method(0); /* We don't want to gzip the output */ | |
111 fwrite_reading(outfp, r, TT_PLN); | |
112 | |
113 r->base = tmp_base; | |
114 read_deallocate(r); | |
115 fflush(outfp); | |
116 | |
117 return 0; | |
118 } | |
119 | |
120 static void usage(void) { | |
121 fprintf(stderr, "Usage: extract_seq [-r] [-(abi|alf|scf|exp|pln|ctf|ztr)]\n" | |
122 " [-good_only] [-clip_cosmid] [-fasta_out]\n" | |
123 " [-output output_name] [input_name] ...\n"); | |
124 exit(1); | |
125 } | |
126 | |
127 int main(int argc, char **argv) { | |
128 int from_stdin = 1; | |
129 mFILE *infp = mstdin(); | |
130 FILE *outfp = stdout; | |
131 int format = TT_ANY; | |
132 int redirect = 1; | |
133 int good_only = 0; | |
134 int clip_cosmid = 0; | |
135 int fasta_out = 0; | |
136 int ret = 0; | |
137 char *fofn = NULL; | |
138 | |
139 for (argc--, argv++; argc > 0; argc--, argv++) { | |
140 if (strcmp(*argv, "-r") == 0) { | |
141 redirect = 0; | |
142 } else if (strcasecmp(*argv, "-abi") == 0) { | |
143 format = TT_ABI; | |
144 } else if (strcasecmp(*argv, "-alf") == 0) { | |
145 format = TT_ALF; | |
146 } else if (strcasecmp(*argv, "-scf") == 0) { | |
147 format = TT_SCF; | |
148 } else if (strcasecmp(*argv, "-exp") == 0) { | |
149 format = TT_EXP; | |
150 } else if (strcasecmp(*argv, "-pln") == 0) { | |
151 format = TT_PLN; | |
152 } else if (strcasecmp(*argv, "-ztr") == 0) { | |
153 format = TT_ZTR; | |
154 } else if (strcasecmp(*argv, "-ctf") == 0) { | |
155 format = TT_CTF; | |
156 } else if (strcasecmp(*argv, "-good_only") == 0) { | |
157 good_only = 1; | |
158 } else if (strcasecmp(*argv, "-clip_cosmid") == 0) { | |
159 clip_cosmid = 1; | |
160 } else if (strcasecmp(*argv, "-fasta_out") == 0) { | |
161 fasta_out = 1; | |
162 } else if (strcmp(*argv, "-fofn") == 0) { | |
163 fofn = *++argv; | |
164 argc--; | |
165 from_stdin = 0; | |
166 } else if (strcasecmp(*argv, "-output") == 0) { | |
167 if (NULL == (outfp = fopen(*++argv, "wb"))) { | |
168 perror(*argv); | |
169 return 1; | |
170 } | |
171 argc--; | |
172 } else if (**argv != '-') { | |
173 from_stdin = 0; | |
174 break; | |
175 } else { | |
176 usage(); | |
177 } | |
178 } | |
179 | |
180 read_experiment_redirect(redirect); | |
181 | |
182 if (!from_stdin) { | |
183 if (fofn) { | |
184 FILE *fofn_fp; | |
185 char line[8192]; | |
186 | |
187 if (strcmp(fofn, "stdin") == 0) | |
188 fofn_fp = stdin; | |
189 else | |
190 fofn_fp = fopen(fofn, "r"); | |
191 | |
192 if (fofn_fp) { | |
193 while (fgets(line, 8192, fofn_fp) != NULL) { | |
194 char *cp; | |
195 if (cp = strchr(line, '\n')) | |
196 *cp = 0; | |
197 if (format == TT_EXP) { | |
198 infp = open_exp_mfile(line, NULL); | |
199 } else { | |
200 infp = open_trace_mfile(line, NULL); | |
201 } | |
202 if (NULL == infp) { | |
203 perror(line); | |
204 ret = 1; | |
205 } else { | |
206 ret |= do_trans(infp, line, outfp, format, good_only, | |
207 clip_cosmid, fasta_out); | |
208 mfclose(infp); | |
209 } | |
210 } | |
211 fclose(fofn_fp); | |
212 } | |
213 } | |
214 for (;argc > 0; argc--, argv++) { | |
215 if (format == TT_EXP) { | |
216 infp = open_exp_mfile(*argv, NULL); | |
217 } else { | |
218 infp = open_trace_mfile(*argv, NULL); | |
219 } | |
220 if (NULL == infp) { | |
221 perror(*argv); | |
222 ret = 1; | |
223 } else { | |
224 ret |= do_trans(infp, *argv, outfp, format, good_only, | |
225 clip_cosmid, fasta_out); | |
226 mfclose(infp); | |
227 } | |
228 } | |
229 } else { | |
230 ret = do_trans(infp, "<stdin>", outfp, format, good_only, clip_cosmid, | |
231 fasta_out); | |
232 } | |
233 | |
234 return ret; | |
235 } | |
236 |