comparison srf2fastq/io_lib-1.12.2/progs/extract_seq.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 /*
2 * Copyright (c) Medical Research Council 1994-1999. All rights reserved.
3 *
4 * Permission to use, copy, modify and distribute this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * this copyright and notice appears in all copies.
7 *
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
9 * as part of the Staden Package at the MRC Laboratory of Molecular
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
11 *
12 * MRC disclaims all warranties with regard to this software.
13 */
14
15 #include <stdio.h>
16 #include <errno.h>
17 #include <string.h>
18 #include <strings.h>
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <io_lib/Read.h>
22 #include <io_lib/traceType.h>
23 #include <io_lib/expFileIO.h>
24 #include <io_lib/open_trace_file.h>
25
26 /* #include "stdio_hack.h" */
27
28 #define LINE_LENGTH 60
29
30 static int do_trans(mFILE *infp, char *in_file, FILE *outfp, int format,
31 int good_only, int clip_cosmid, int fasta_out) {
32 Read *r;
33 char *tmp_base;
34
35 read_sections(READ_BASES);
36 if (NULL == (r = mfread_reading(infp, in_file, format))) {
37 fprintf(stderr, "Failed to read file '%s'\n", in_file);
38 return 1;
39 }
40
41 tmp_base = r->base;
42
43 #ifdef IOLIB_EXP
44 if (good_only && r->orig_trace_format == TT_EXP) {
45 int left=0, right=r->NBases + 1, val, lval, rval;
46 Exp_info *e = (Exp_info *)r->orig_trace;
47
48 if (0 == exp_get_int(e, EFLT_SL, &val))
49 if (val > left)
50 left = val;
51 if (0 == exp_get_int(e, EFLT_QL, &val))
52 if (val > left)
53 left = val;
54
55 if (0 == exp_get_int(e, EFLT_SR, &val))
56 if (val < right)
57 right = val;
58 if (0 == exp_get_int(e, EFLT_QR, &val))
59 if (val < right)
60 right = val;
61
62 /* This is horrid - see gap seqInfo.c file for explaination */
63 if (clip_cosmid) {
64 int got_cosmid;
65
66 if (0 == exp_get_rng(e, EFLT_CS, &lval, &rval)) {
67 got_cosmid = 1;
68 } else if (0 == exp_get_int(e, EFLT_CL, &lval) &&
69 0 == exp_get_int(e, EFLT_CR, &rval)) {
70 got_cosmid = 1;
71 } else {
72 got_cosmid = 0;
73 }
74
75 if (got_cosmid) {
76 if (lval <= left && rval <= left) ;
77 else if (lval <= left+1 && rval < right) left = rval;
78 else if (lval <= left+1 && rval >= right) right = left+1;
79 else if (lval < right && rval < right) right = lval;
80 else if (lval < right && rval >= right) right = lval;
81 }
82 }
83
84 r->base += left;
85 r->NBases = right - left - 1;
86 } else
87 #endif /* IOLIB_EXP */
88 if (good_only) {
89 r->base += r->leftCutoff;
90 r->NBases = r->rightCutoff - r->leftCutoff - 1;
91 }
92
93 if (fasta_out) {
94 char *p = strrchr(in_file, '/');
95 int i;
96
97 /* Add header */
98 if (NULL == p)
99 p = in_file;
100 else
101 p++;
102 fprintf(outfp, ">%s\n", p);
103
104 /* Replace - with N */
105 for (i = 0; i < r->NBases; i++) {
106 if (r->base[i] == '-')
107 r->base[i] = 'N';
108 }
109 }
110 set_compression_method(0); /* We don't want to gzip the output */
111 fwrite_reading(outfp, r, TT_PLN);
112
113 r->base = tmp_base;
114 read_deallocate(r);
115 fflush(outfp);
116
117 return 0;
118 }
119
120 static void usage(void) {
121 fprintf(stderr, "Usage: extract_seq [-r] [-(abi|alf|scf|exp|pln|ctf|ztr)]\n"
122 " [-good_only] [-clip_cosmid] [-fasta_out]\n"
123 " [-output output_name] [input_name] ...\n");
124 exit(1);
125 }
126
127 int main(int argc, char **argv) {
128 int from_stdin = 1;
129 mFILE *infp = mstdin();
130 FILE *outfp = stdout;
131 int format = TT_ANY;
132 int redirect = 1;
133 int good_only = 0;
134 int clip_cosmid = 0;
135 int fasta_out = 0;
136 int ret = 0;
137 char *fofn = NULL;
138
139 for (argc--, argv++; argc > 0; argc--, argv++) {
140 if (strcmp(*argv, "-r") == 0) {
141 redirect = 0;
142 } else if (strcasecmp(*argv, "-abi") == 0) {
143 format = TT_ABI;
144 } else if (strcasecmp(*argv, "-alf") == 0) {
145 format = TT_ALF;
146 } else if (strcasecmp(*argv, "-scf") == 0) {
147 format = TT_SCF;
148 } else if (strcasecmp(*argv, "-exp") == 0) {
149 format = TT_EXP;
150 } else if (strcasecmp(*argv, "-pln") == 0) {
151 format = TT_PLN;
152 } else if (strcasecmp(*argv, "-ztr") == 0) {
153 format = TT_ZTR;
154 } else if (strcasecmp(*argv, "-ctf") == 0) {
155 format = TT_CTF;
156 } else if (strcasecmp(*argv, "-good_only") == 0) {
157 good_only = 1;
158 } else if (strcasecmp(*argv, "-clip_cosmid") == 0) {
159 clip_cosmid = 1;
160 } else if (strcasecmp(*argv, "-fasta_out") == 0) {
161 fasta_out = 1;
162 } else if (strcmp(*argv, "-fofn") == 0) {
163 fofn = *++argv;
164 argc--;
165 from_stdin = 0;
166 } else if (strcasecmp(*argv, "-output") == 0) {
167 if (NULL == (outfp = fopen(*++argv, "wb"))) {
168 perror(*argv);
169 return 1;
170 }
171 argc--;
172 } else if (**argv != '-') {
173 from_stdin = 0;
174 break;
175 } else {
176 usage();
177 }
178 }
179
180 read_experiment_redirect(redirect);
181
182 if (!from_stdin) {
183 if (fofn) {
184 FILE *fofn_fp;
185 char line[8192];
186
187 if (strcmp(fofn, "stdin") == 0)
188 fofn_fp = stdin;
189 else
190 fofn_fp = fopen(fofn, "r");
191
192 if (fofn_fp) {
193 while (fgets(line, 8192, fofn_fp) != NULL) {
194 char *cp;
195 if (cp = strchr(line, '\n'))
196 *cp = 0;
197 if (format == TT_EXP) {
198 infp = open_exp_mfile(line, NULL);
199 } else {
200 infp = open_trace_mfile(line, NULL);
201 }
202 if (NULL == infp) {
203 perror(line);
204 ret = 1;
205 } else {
206 ret |= do_trans(infp, line, outfp, format, good_only,
207 clip_cosmid, fasta_out);
208 mfclose(infp);
209 }
210 }
211 fclose(fofn_fp);
212 }
213 }
214 for (;argc > 0; argc--, argv++) {
215 if (format == TT_EXP) {
216 infp = open_exp_mfile(*argv, NULL);
217 } else {
218 infp = open_trace_mfile(*argv, NULL);
219 }
220 if (NULL == infp) {
221 perror(*argv);
222 ret = 1;
223 } else {
224 ret |= do_trans(infp, *argv, outfp, format, good_only,
225 clip_cosmid, fasta_out);
226 mfclose(infp);
227 }
228 }
229 } else {
230 ret = do_trans(infp, "<stdin>", outfp, format, good_only, clip_cosmid,
231 fasta_out);
232 }
233
234 return ret;
235 }
236