Mercurial > repos > dawe > srf2fastq
diff srf2fastq/io_lib-1.12.2/io_lib/traceType.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/srf2fastq/io_lib-1.12.2/io_lib/traceType.c Tue Jun 07 17:48:05 2011 -0400 @@ -0,0 +1,240 @@ +/* + * Copyright (c) Medical Research Council 1994. All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation for any purpose is hereby granted without fee, provided that + * this copyright and notice appears in all copies. + * + * This file was written by James Bonfield, Simon Dear, Rodger Staden, + * as part of the Staden Package at the MRC Laboratory of Molecular + * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. + * + * MRC disclaims all warranties with regard to this software. + */ + +/* + Title: traceType + + File: traceType.c + Purpose: determining trace format + + Last update: 01/09/94 + + Change log : Update for use with the Read library. +*/ + +/* ---- Imports ---- */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "io_lib/stdio_hack.h" + +#include "io_lib/traceType.h" +#include "io_lib/Read.h" +#include "io_lib/open_trace_file.h" + +#ifdef USE_BIOLIMS +#include "spBiolims.h" +#endif + +#ifndef isascii +#define isascii(c) ((c) >= 0 && (c) <= 0x7f) +#endif + +/* ---- Privates ---- */ +static struct { + int type; + int offset; + char *string; +} magics[] = { + { TT_SCF, 0, ".scf" } , + { TT_CTF, 1, "\007\375\343\000" } , /* mieg */ + { TT_ZTR, 0, "\256ZTR\r\n\032\n" } , + { TT_ABI, 0, "ABIF" } , + { TT_ABI, 128, "ABIF" } , + { TT_ALF, 518, "ALF " } , + { TT_SCF, 0, "\234\330\300\000" }, /* Amersham variant */ + { TT_SFF, 0, ".sff" } , + { TT_EXP, 0, "ID " } , + { TT_ALF, 0, "ALF " } , /* Added by newer alfsplit programs */ + { TT_ALF, 0, "\021G\021G" } , /* Pharmacia's alfsplit equiv */ + { TT_ALF, 1546,"X-axis" } /* Good guestimation if all else fails */ +}; + +#define Number(A) ( sizeof(A) / sizeof((A)[0]) ) + +/* ---- exported ---- */ + +/* unix specific file deletion routine */ +int remove_file(char *fn) { return unlink(fn); } + + +/* + * Determine the trace type for FILE * 'fp'. + * + * NB - This function should NOT be used when biolims support is required + * (as biolims doesn't use files !) + * + * Returns: + * TT_SCF, TT_CTF, TT_ZTR, TT_ABI, TT_ALF, or TT_PLN for success. + * TT_UNK for unknown type. + * TT_ERR for error. + */ +int fdetermine_trace_type(FILE *fp) +{ + unsigned int i; + size_t len; + char buf[512]; + int ps; + int acgt; + int c; + + /* check magics */ + for (i = 0 ; i < Number(magics) ; i++) { + if (fseek(fp,magics[i].offset,0) == 0) { + len = strlen(magics[i].string); + if (fread(buf,1,len,fp)==len) { + if (strncmp(buf,magics[i].string,len)==0) { + return magics[i].type; + } + } + } + } + fseek(fp, 0, 0); + + /* determine if this is a text file */ + len = 0; ps = 0; acgt = 0; + for (i = 0; i < 512; i++) { + if ( ( c = fgetc(fp) ) == EOF ) break; + switch(c) { + case 'a': case 'c': case 'g': case 't': + case 'A': case 'C': case 'G': case 'T': + /*YUK! need the next line?*/ + case 'n': case 'N': case '-': + acgt++; + default: + len++; + if ( (isprint(c) && isascii(c)) || isspace(c) ) ps++; + } + } + fseek(fp, 0, 0); + /*YUK! 75% of characters printable means text*/ + if ( 100 * (size_t)ps > 75 * len ) { + /*YUK! 75% of printables ACGTN means plain*/ + if (100 * acgt > 75 * ps) { + return TT_PLN; + } + } + + /* YUK! short files are not traces? */ + if (len<512) { + return TT_UNK; + } + + return TT_UNK; +} + +/* + * Determine the trace type for file 'fn'. + * + * Returns: + * TT_SCF, TT_CTF, TT_ZTR, TT_ABI, TT_ALF, TT_BIO, or TT_PLN for success. + * TT_UNK for unknown type. + * TT_ERR for error. + */ +int determine_trace_type(char *fn) +{ + FILE *fp; + int r; + +#ifdef USE_BIOLIMS + if(IS_BIOLIMS_PATH(fn)) + return TT_BIO; +#endif + + if ( (fp = open_trace_file(fn, NULL)) == NULL ) return TT_ERR; + + r = fdetermine_trace_type(fp); + fclose(fp); + + return r; +} + +/* + * Converts a trace type string to an integer. + */ +int trace_type_str2int(char *str) { + if (strcmp(str, "SCF") == 0 || strcmp(str, "scf") == 0) + return TT_SCF; + else if (strcmp(str, "SFF") == 0 || strcmp(str, "sff") == 0) + return TT_SFF; /* 454 */ + else if (strcmp(str, "CTF") == 0 || strcmp(str, "ctf") == 0) + return TT_CTF; /* mieg */ + else if (strcmp(str, "ZTR") == 0 || strcmp(str, "ztr") == 0) + return TT_ZTR; + else if (strcmp(str, "ZTR1") == 0 || strcmp(str, "ztr1") == 0) + return TT_ZTR1; + else if (strcmp(str, "ZTR2") == 0 || strcmp(str, "ztr2") == 0) + return TT_ZTR2; + else if (strcmp(str, "ZTR3") == 0 || strcmp(str, "ztr3") == 0) + return TT_ZTR3; + else if (strcmp(str, "ABI") == 0 || strcmp(str, "abi") == 0) + return TT_ABI; + else if (strcmp(str, "ALF") == 0 || strcmp(str, "alf") == 0) + return TT_ALF; + else if (strcmp(str, "PLN") == 0 || strcmp(str, "pln") == 0) + return TT_PLN; + else if (strcmp(str, "EXP") == 0 || strcmp(str, "exp") == 0) + return TT_EXP; + else if (strcmp(str, "BIO") == 0 || strcmp(str, "bio") == 0) + return TT_BIO; + else if (strcmp(str, "ANYTR") == 0 || strcmp(str, "anytr") == 0) + return TT_ANYTR; + else + return TT_UNK; +} + +/* + * Converts a trace type integer to a string. + */ +char *trace_type_int2str(int type) { + char *t; + + switch(type) { + case TT_SCF: t = "SCF"; break; + case TT_SFF: t = "SFF"; break; /* 454 */ + case TT_CTF: t = "CTF"; break; /* mieg */ + case TT_ZTR: t = "ZTR";break; + case TT_ZTR1: t = "ZTR1";break; + case TT_ZTR2: t = "ZTR2";break; + case TT_ZTR3: t = "ZTR3";break; + case TT_ABI: t = "ABI"; break; + case TT_ALF: t = "ALF"; break; + case TT_PLN: t = "PLN"; break; + case TT_EXP: t = "EXP"; break; + case TT_BIO: t = "BIO"; break; + case TT_ANYTR: t="ANYTR"; break; + default: + case TT_UNK: t = "UNK"; break; + } + + return t; +} + +/* + * Returns a statically declared string containing a 3 character + * identifier for the trace type of this file. + * "ERR" represents error, and "UNK" for unknown. + * Successful values are "SCF", "ABI", "ALF", "PLN", "CTF", "ZTR" and "BIO". + */ +char *trace_type_str(char *traceName) +{ + int t; + + if ((t = determine_trace_type(traceName)) == TT_ERR) + return "ERR"; + else + return trace_type_int2str(t); +}