Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/io_lib/traceType.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 /* | |
2 * Copyright (c) Medical Research Council 1994. All rights reserved. | |
3 * | |
4 * Permission to use, copy, modify and distribute this software and its | |
5 * documentation for any purpose is hereby granted without fee, provided that | |
6 * this copyright and notice appears in all copies. | |
7 * | |
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden, | |
9 * as part of the Staden Package at the MRC Laboratory of Molecular | |
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. | |
11 * | |
12 * MRC disclaims all warranties with regard to this software. | |
13 */ | |
14 | |
15 /* | |
16 Title: traceType | |
17 | |
18 File: traceType.c | |
19 Purpose: determining trace format | |
20 | |
21 Last update: 01/09/94 | |
22 | |
23 Change log : Update for use with the Read library. | |
24 */ | |
25 | |
26 /* ---- Imports ---- */ | |
27 | |
28 #include <stdio.h> | |
29 #include <stdlib.h> | |
30 #include <unistd.h> | |
31 | |
32 #include "io_lib/stdio_hack.h" | |
33 | |
34 #include "io_lib/traceType.h" | |
35 #include "io_lib/Read.h" | |
36 #include "io_lib/open_trace_file.h" | |
37 | |
38 #ifdef USE_BIOLIMS | |
39 #include "spBiolims.h" | |
40 #endif | |
41 | |
42 #ifndef isascii | |
43 #define isascii(c) ((c) >= 0 && (c) <= 0x7f) | |
44 #endif | |
45 | |
46 /* ---- Privates ---- */ | |
47 static struct { | |
48 int type; | |
49 int offset; | |
50 char *string; | |
51 } magics[] = { | |
52 { TT_SCF, 0, ".scf" } , | |
53 { TT_CTF, 1, "\007\375\343\000" } , /* mieg */ | |
54 { TT_ZTR, 0, "\256ZTR\r\n\032\n" } , | |
55 { TT_ABI, 0, "ABIF" } , | |
56 { TT_ABI, 128, "ABIF" } , | |
57 { TT_ALF, 518, "ALF " } , | |
58 { TT_SCF, 0, "\234\330\300\000" }, /* Amersham variant */ | |
59 { TT_SFF, 0, ".sff" } , | |
60 { TT_EXP, 0, "ID " } , | |
61 { TT_ALF, 0, "ALF " } , /* Added by newer alfsplit programs */ | |
62 { TT_ALF, 0, "\021G\021G" } , /* Pharmacia's alfsplit equiv */ | |
63 { TT_ALF, 1546,"X-axis" } /* Good guestimation if all else fails */ | |
64 }; | |
65 | |
66 #define Number(A) ( sizeof(A) / sizeof((A)[0]) ) | |
67 | |
68 /* ---- exported ---- */ | |
69 | |
70 /* unix specific file deletion routine */ | |
71 int remove_file(char *fn) { return unlink(fn); } | |
72 | |
73 | |
74 /* | |
75 * Determine the trace type for FILE * 'fp'. | |
76 * | |
77 * NB - This function should NOT be used when biolims support is required | |
78 * (as biolims doesn't use files !) | |
79 * | |
80 * Returns: | |
81 * TT_SCF, TT_CTF, TT_ZTR, TT_ABI, TT_ALF, or TT_PLN for success. | |
82 * TT_UNK for unknown type. | |
83 * TT_ERR for error. | |
84 */ | |
85 int fdetermine_trace_type(FILE *fp) | |
86 { | |
87 unsigned int i; | |
88 size_t len; | |
89 char buf[512]; | |
90 int ps; | |
91 int acgt; | |
92 int c; | |
93 | |
94 /* check magics */ | |
95 for (i = 0 ; i < Number(magics) ; i++) { | |
96 if (fseek(fp,magics[i].offset,0) == 0) { | |
97 len = strlen(magics[i].string); | |
98 if (fread(buf,1,len,fp)==len) { | |
99 if (strncmp(buf,magics[i].string,len)==0) { | |
100 return magics[i].type; | |
101 } | |
102 } | |
103 } | |
104 } | |
105 fseek(fp, 0, 0); | |
106 | |
107 /* determine if this is a text file */ | |
108 len = 0; ps = 0; acgt = 0; | |
109 for (i = 0; i < 512; i++) { | |
110 if ( ( c = fgetc(fp) ) == EOF ) break; | |
111 switch(c) { | |
112 case 'a': case 'c': case 'g': case 't': | |
113 case 'A': case 'C': case 'G': case 'T': | |
114 /*YUK! need the next line?*/ | |
115 case 'n': case 'N': case '-': | |
116 acgt++; | |
117 default: | |
118 len++; | |
119 if ( (isprint(c) && isascii(c)) || isspace(c) ) ps++; | |
120 } | |
121 } | |
122 fseek(fp, 0, 0); | |
123 /*YUK! 75% of characters printable means text*/ | |
124 if ( 100 * (size_t)ps > 75 * len ) { | |
125 /*YUK! 75% of printables ACGTN means plain*/ | |
126 if (100 * acgt > 75 * ps) { | |
127 return TT_PLN; | |
128 } | |
129 } | |
130 | |
131 /* YUK! short files are not traces? */ | |
132 if (len<512) { | |
133 return TT_UNK; | |
134 } | |
135 | |
136 return TT_UNK; | |
137 } | |
138 | |
139 /* | |
140 * Determine the trace type for file 'fn'. | |
141 * | |
142 * Returns: | |
143 * TT_SCF, TT_CTF, TT_ZTR, TT_ABI, TT_ALF, TT_BIO, or TT_PLN for success. | |
144 * TT_UNK for unknown type. | |
145 * TT_ERR for error. | |
146 */ | |
147 int determine_trace_type(char *fn) | |
148 { | |
149 FILE *fp; | |
150 int r; | |
151 | |
152 #ifdef USE_BIOLIMS | |
153 if(IS_BIOLIMS_PATH(fn)) | |
154 return TT_BIO; | |
155 #endif | |
156 | |
157 if ( (fp = open_trace_file(fn, NULL)) == NULL ) return TT_ERR; | |
158 | |
159 r = fdetermine_trace_type(fp); | |
160 fclose(fp); | |
161 | |
162 return r; | |
163 } | |
164 | |
165 /* | |
166 * Converts a trace type string to an integer. | |
167 */ | |
168 int trace_type_str2int(char *str) { | |
169 if (strcmp(str, "SCF") == 0 || strcmp(str, "scf") == 0) | |
170 return TT_SCF; | |
171 else if (strcmp(str, "SFF") == 0 || strcmp(str, "sff") == 0) | |
172 return TT_SFF; /* 454 */ | |
173 else if (strcmp(str, "CTF") == 0 || strcmp(str, "ctf") == 0) | |
174 return TT_CTF; /* mieg */ | |
175 else if (strcmp(str, "ZTR") == 0 || strcmp(str, "ztr") == 0) | |
176 return TT_ZTR; | |
177 else if (strcmp(str, "ZTR1") == 0 || strcmp(str, "ztr1") == 0) | |
178 return TT_ZTR1; | |
179 else if (strcmp(str, "ZTR2") == 0 || strcmp(str, "ztr2") == 0) | |
180 return TT_ZTR2; | |
181 else if (strcmp(str, "ZTR3") == 0 || strcmp(str, "ztr3") == 0) | |
182 return TT_ZTR3; | |
183 else if (strcmp(str, "ABI") == 0 || strcmp(str, "abi") == 0) | |
184 return TT_ABI; | |
185 else if (strcmp(str, "ALF") == 0 || strcmp(str, "alf") == 0) | |
186 return TT_ALF; | |
187 else if (strcmp(str, "PLN") == 0 || strcmp(str, "pln") == 0) | |
188 return TT_PLN; | |
189 else if (strcmp(str, "EXP") == 0 || strcmp(str, "exp") == 0) | |
190 return TT_EXP; | |
191 else if (strcmp(str, "BIO") == 0 || strcmp(str, "bio") == 0) | |
192 return TT_BIO; | |
193 else if (strcmp(str, "ANYTR") == 0 || strcmp(str, "anytr") == 0) | |
194 return TT_ANYTR; | |
195 else | |
196 return TT_UNK; | |
197 } | |
198 | |
199 /* | |
200 * Converts a trace type integer to a string. | |
201 */ | |
202 char *trace_type_int2str(int type) { | |
203 char *t; | |
204 | |
205 switch(type) { | |
206 case TT_SCF: t = "SCF"; break; | |
207 case TT_SFF: t = "SFF"; break; /* 454 */ | |
208 case TT_CTF: t = "CTF"; break; /* mieg */ | |
209 case TT_ZTR: t = "ZTR";break; | |
210 case TT_ZTR1: t = "ZTR1";break; | |
211 case TT_ZTR2: t = "ZTR2";break; | |
212 case TT_ZTR3: t = "ZTR3";break; | |
213 case TT_ABI: t = "ABI"; break; | |
214 case TT_ALF: t = "ALF"; break; | |
215 case TT_PLN: t = "PLN"; break; | |
216 case TT_EXP: t = "EXP"; break; | |
217 case TT_BIO: t = "BIO"; break; | |
218 case TT_ANYTR: t="ANYTR"; break; | |
219 default: | |
220 case TT_UNK: t = "UNK"; break; | |
221 } | |
222 | |
223 return t; | |
224 } | |
225 | |
226 /* | |
227 * Returns a statically declared string containing a 3 character | |
228 * identifier for the trace type of this file. | |
229 * "ERR" represents error, and "UNK" for unknown. | |
230 * Successful values are "SCF", "ABI", "ALF", "PLN", "CTF", "ZTR" and "BIO". | |
231 */ | |
232 char *trace_type_str(char *traceName) | |
233 { | |
234 int t; | |
235 | |
236 if ((t = determine_trace_type(traceName)) == TT_ERR) | |
237 return "ERR"; | |
238 else | |
239 return trace_type_int2str(t); | |
240 } |