comparison srf2fastq/io_lib-1.12.2/io_lib/traceType.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 /*
2 * Copyright (c) Medical Research Council 1994. All rights reserved.
3 *
4 * Permission to use, copy, modify and distribute this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * this copyright and notice appears in all copies.
7 *
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
9 * as part of the Staden Package at the MRC Laboratory of Molecular
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
11 *
12 * MRC disclaims all warranties with regard to this software.
13 */
14
15 /*
16 Title: traceType
17
18 File: traceType.c
19 Purpose: determining trace format
20
21 Last update: 01/09/94
22
23 Change log : Update for use with the Read library.
24 */
25
26 /* ---- Imports ---- */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31
32 #include "io_lib/stdio_hack.h"
33
34 #include "io_lib/traceType.h"
35 #include "io_lib/Read.h"
36 #include "io_lib/open_trace_file.h"
37
38 #ifdef USE_BIOLIMS
39 #include "spBiolims.h"
40 #endif
41
42 #ifndef isascii
43 #define isascii(c) ((c) >= 0 && (c) <= 0x7f)
44 #endif
45
46 /* ---- Privates ---- */
47 static struct {
48 int type;
49 int offset;
50 char *string;
51 } magics[] = {
52 { TT_SCF, 0, ".scf" } ,
53 { TT_CTF, 1, "\007\375\343\000" } , /* mieg */
54 { TT_ZTR, 0, "\256ZTR\r\n\032\n" } ,
55 { TT_ABI, 0, "ABIF" } ,
56 { TT_ABI, 128, "ABIF" } ,
57 { TT_ALF, 518, "ALF " } ,
58 { TT_SCF, 0, "\234\330\300\000" }, /* Amersham variant */
59 { TT_SFF, 0, ".sff" } ,
60 { TT_EXP, 0, "ID " } ,
61 { TT_ALF, 0, "ALF " } , /* Added by newer alfsplit programs */
62 { TT_ALF, 0, "\021G\021G" } , /* Pharmacia's alfsplit equiv */
63 { TT_ALF, 1546,"X-axis" } /* Good guestimation if all else fails */
64 };
65
66 #define Number(A) ( sizeof(A) / sizeof((A)[0]) )
67
68 /* ---- exported ---- */
69
70 /* unix specific file deletion routine */
71 int remove_file(char *fn) { return unlink(fn); }
72
73
74 /*
75 * Determine the trace type for FILE * 'fp'.
76 *
77 * NB - This function should NOT be used when biolims support is required
78 * (as biolims doesn't use files !)
79 *
80 * Returns:
81 * TT_SCF, TT_CTF, TT_ZTR, TT_ABI, TT_ALF, or TT_PLN for success.
82 * TT_UNK for unknown type.
83 * TT_ERR for error.
84 */
85 int fdetermine_trace_type(FILE *fp)
86 {
87 unsigned int i;
88 size_t len;
89 char buf[512];
90 int ps;
91 int acgt;
92 int c;
93
94 /* check magics */
95 for (i = 0 ; i < Number(magics) ; i++) {
96 if (fseek(fp,magics[i].offset,0) == 0) {
97 len = strlen(magics[i].string);
98 if (fread(buf,1,len,fp)==len) {
99 if (strncmp(buf,magics[i].string,len)==0) {
100 return magics[i].type;
101 }
102 }
103 }
104 }
105 fseek(fp, 0, 0);
106
107 /* determine if this is a text file */
108 len = 0; ps = 0; acgt = 0;
109 for (i = 0; i < 512; i++) {
110 if ( ( c = fgetc(fp) ) == EOF ) break;
111 switch(c) {
112 case 'a': case 'c': case 'g': case 't':
113 case 'A': case 'C': case 'G': case 'T':
114 /*YUK! need the next line?*/
115 case 'n': case 'N': case '-':
116 acgt++;
117 default:
118 len++;
119 if ( (isprint(c) && isascii(c)) || isspace(c) ) ps++;
120 }
121 }
122 fseek(fp, 0, 0);
123 /*YUK! 75% of characters printable means text*/
124 if ( 100 * (size_t)ps > 75 * len ) {
125 /*YUK! 75% of printables ACGTN means plain*/
126 if (100 * acgt > 75 * ps) {
127 return TT_PLN;
128 }
129 }
130
131 /* YUK! short files are not traces? */
132 if (len<512) {
133 return TT_UNK;
134 }
135
136 return TT_UNK;
137 }
138
139 /*
140 * Determine the trace type for file 'fn'.
141 *
142 * Returns:
143 * TT_SCF, TT_CTF, TT_ZTR, TT_ABI, TT_ALF, TT_BIO, or TT_PLN for success.
144 * TT_UNK for unknown type.
145 * TT_ERR for error.
146 */
147 int determine_trace_type(char *fn)
148 {
149 FILE *fp;
150 int r;
151
152 #ifdef USE_BIOLIMS
153 if(IS_BIOLIMS_PATH(fn))
154 return TT_BIO;
155 #endif
156
157 if ( (fp = open_trace_file(fn, NULL)) == NULL ) return TT_ERR;
158
159 r = fdetermine_trace_type(fp);
160 fclose(fp);
161
162 return r;
163 }
164
165 /*
166 * Converts a trace type string to an integer.
167 */
168 int trace_type_str2int(char *str) {
169 if (strcmp(str, "SCF") == 0 || strcmp(str, "scf") == 0)
170 return TT_SCF;
171 else if (strcmp(str, "SFF") == 0 || strcmp(str, "sff") == 0)
172 return TT_SFF; /* 454 */
173 else if (strcmp(str, "CTF") == 0 || strcmp(str, "ctf") == 0)
174 return TT_CTF; /* mieg */
175 else if (strcmp(str, "ZTR") == 0 || strcmp(str, "ztr") == 0)
176 return TT_ZTR;
177 else if (strcmp(str, "ZTR1") == 0 || strcmp(str, "ztr1") == 0)
178 return TT_ZTR1;
179 else if (strcmp(str, "ZTR2") == 0 || strcmp(str, "ztr2") == 0)
180 return TT_ZTR2;
181 else if (strcmp(str, "ZTR3") == 0 || strcmp(str, "ztr3") == 0)
182 return TT_ZTR3;
183 else if (strcmp(str, "ABI") == 0 || strcmp(str, "abi") == 0)
184 return TT_ABI;
185 else if (strcmp(str, "ALF") == 0 || strcmp(str, "alf") == 0)
186 return TT_ALF;
187 else if (strcmp(str, "PLN") == 0 || strcmp(str, "pln") == 0)
188 return TT_PLN;
189 else if (strcmp(str, "EXP") == 0 || strcmp(str, "exp") == 0)
190 return TT_EXP;
191 else if (strcmp(str, "BIO") == 0 || strcmp(str, "bio") == 0)
192 return TT_BIO;
193 else if (strcmp(str, "ANYTR") == 0 || strcmp(str, "anytr") == 0)
194 return TT_ANYTR;
195 else
196 return TT_UNK;
197 }
198
199 /*
200 * Converts a trace type integer to a string.
201 */
202 char *trace_type_int2str(int type) {
203 char *t;
204
205 switch(type) {
206 case TT_SCF: t = "SCF"; break;
207 case TT_SFF: t = "SFF"; break; /* 454 */
208 case TT_CTF: t = "CTF"; break; /* mieg */
209 case TT_ZTR: t = "ZTR";break;
210 case TT_ZTR1: t = "ZTR1";break;
211 case TT_ZTR2: t = "ZTR2";break;
212 case TT_ZTR3: t = "ZTR3";break;
213 case TT_ABI: t = "ABI"; break;
214 case TT_ALF: t = "ALF"; break;
215 case TT_PLN: t = "PLN"; break;
216 case TT_EXP: t = "EXP"; break;
217 case TT_BIO: t = "BIO"; break;
218 case TT_ANYTR: t="ANYTR"; break;
219 default:
220 case TT_UNK: t = "UNK"; break;
221 }
222
223 return t;
224 }
225
226 /*
227 * Returns a statically declared string containing a 3 character
228 * identifier for the trace type of this file.
229 * "ERR" represents error, and "UNK" for unknown.
230 * Successful values are "SCF", "ABI", "ALF", "PLN", "CTF", "ZTR" and "BIO".
231 */
232 char *trace_type_str(char *traceName)
233 {
234 int t;
235
236 if ((t = determine_trace_type(traceName)) == TT_ERR)
237 return "ERR";
238 else
239 return trace_type_int2str(t);
240 }