Mercurial > repos > dawe > srf2fastq
diff srf2fastq/io_lib-1.12.2/io_lib/seqIOPlain.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/srf2fastq/io_lib-1.12.2/io_lib/seqIOPlain.c Tue Jun 07 17:48:05 2011 -0400 @@ -0,0 +1,263 @@ +/* + * Copyright (c) Medical Research Council 1994. All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation for any purpose is hereby granted without fee, provided that + * this copyright and notice appears in all copies. + * + * This file was written by James Bonfield, Simon Dear, Rodger Staden, + * as part of the Staden Package at the MRC Laboratory of Molecular + * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. + * + * MRC disclaims all warranties with regard to this software. + */ + +/* + Title: seqIOPlain + + File: seqIOPlain.c + Purpose: IO of plain sequences + Last update: Tuesday Jan 15 1991 + + Change log: + + 28.11.90 SD put undesirables under STLOUIS compilation flag + 15.01.91 SD new include file (opp.h) + 17.09.91 LFW changed STLOUIS compilation flag to SAVE_EDITS + and AUTO_CLIP + 08.09.94 JKB Plain files now also uses the ';<' and ';>' lines. + 09.09.94 JKB Update to use Read instead of Seq library. + 01.06.07 JKB Supports single-read fasta files; about time too! + */ + + +#define LINE_LENGTH 60 + +/* + * This module should be regarded as part of `read' since it is + * privy to the internal structure of `Read'. + * + * This library also requires use of the mach-io code for the endian + * independent machine IO. + * + * Any references to the writing or reading of edited sequences, + * or to the bottom strand were added by lfw + */ + + + + +/* ---- Imports ---- */ + +#include <stdio.h> /* IMPORT: fopen, fclose, fseek, ftell, fgetc */ +#include <ctype.h> /* IMPORT: isprint */ +#include <string.h> + +#include "io_lib/misc.h" +#include "io_lib/plain.h" +#include "io_lib/Read.h" +#include "io_lib/xalloc.h" +#include "io_lib/traceType.h" + +#include "io_lib/stdio_hack.h" +/* ---- Constants ---- */ + +#define BasesPerLine 50 /* For output formatting */ + + +/* ---- Exports ---- */ + + +/* + * Read the plain format sequence from FILE *fp into a Read structure. + * All printing characters (as defined by ANSII C `isprint') + * are accepted, but `N's are translated to `-'s. + * + * Returns: + * Read * - Success, the Read structure read. + * NULLRead - Failure. + */ +Read *fread_pln(FILE *fp) { + Read *read = NULLRead; + off_t fileLen; + int ch; + char *leftc, *rightc, *leftcp, *rightcp; + int first = 1; + + /* + * Find the length of the file. + * Use this as an overestimate of the length of the sequence. + */ + fseek(fp, (off_t) 0, 2); + if ((fileLen = ftell(fp)) > INT_MAX /*Was MAXINT2*/) + goto bail_out; + + fseek(fp, (off_t) 0, 0); + + /* Allocate the sequence */ + if (NULLRead == (read = read_allocate(0, fileLen))) + goto bail_out; + + if (NULL == (leftc = (char *)xmalloc(fileLen))) + goto bail_out; + + if (NULL == (rightc = (char *)xmalloc(fileLen))) + goto bail_out; + + leftcp = leftc; + rightcp = rightc; + + /* Read in the bases */ + + read->NBases = 0; + read->format = TT_PLN; + + while ((ch = fgetc(fp)) != EOF) { + if (ch == '>') { + /* Fasta format file - skip the header and load the first + * fasta sequence only. We don't even attempt to worry about + * multi-sequence file formats for now. + */ + if (!first) + break; + + while(ch != '\n' && ch != EOF) + ch = fgetc(fp); + + } else if (ch==';') { + /* + * ;< is left cutoff, + * ;> is right cutoff. + * Any other ';'s we can treat as a comments. + */ + ch = fgetc(fp); + + if (first == 1 && ch != '<' && ch != '>') { + int d; + char type[5], name[17], line[1024]; + + line[0] = ch; + fgets(&line[1], 1022, fp); + + if (5 == sscanf(line, "%6d%6d%6d%4c%s", + &d, &d, &d, type, name)) { + char * p; + + if (p = strchr(type, ' ')) + *p = 0; + + read->format = trace_type_str2int(type); + read->trace_name = (char *)xmalloc(strlen(name)+1); + if (read->trace_name) + strcpy(read->trace_name, name); + } + } + + else if (ch == '<') { + ch = fgetc(fp); + while (ch != '\n') { + *leftcp++ = ch; + ch = fgetc(fp); + } + } else if (ch == '>') { + ch = fgetc(fp); + while (ch != '\n') { + *rightcp++ = ch; + ch = fgetc(fp); + } + } else { + while(ch != '\n' && ch != EOF) + ch = fgetc(fp); + } + } else if (isprint(ch) && !isspace(ch)) { + read->base[read->NBases++] = ((ch)=='N') ? '-' : (ch); + } + + first = 0; + } + + *leftcp = *rightcp = 0; + + read->leftCutoff = strlen(leftc); + read->rightCutoff = read->leftCutoff + read->NBases + 1; + memmove(&read->base[read->leftCutoff], read->base, read->NBases); + memmove(read->base, leftc, read->leftCutoff); + memmove(&read->base[read->leftCutoff + read->NBases], + rightc, strlen(rightc)); + + read->NBases += read->leftCutoff + strlen(rightc); + read->base[read->NBases] = 0; + + xfree(leftc); + xfree(rightc); + + /* SUCCESS */ + return(read); + + /* FAILURE */ + bail_out: + if (read) + read_deallocate(read); + + return NULLRead; +} + +/* + * Read the plain format sequence with name `fn' into a Read structure. + * All printing characters (as defined by ANSII C `isprint') + * are accepted, but `N's are translated to `-'s. + * + * Returns: + * Read * - Success, the Read structure read. + * NULLRead - Failure. + */ +Read *read_pln(char *fn) { + FILE *fp; + Read *read; + + /* Open file */ + if ((fp = fopen(fn, "r")) == NULL) + return NULLRead; + + read = fread_pln(fp); + fclose(fp); + + if (read && read->trace_name == NULL && + (read->trace_name = (char *)xmalloc(strlen(fn)+1))) + strcpy(read->trace_name, fn); + + return read; +} + + +/* + * Write to a Plain file + */ +int fwrite_pln(FILE *fp, Read *read) { + int i, err = 0; + + for (i = 0; i < read->NBases; i += LINE_LENGTH) + if (-1 == fprintf(fp, "%.*s\n", + read->NBases - i > LINE_LENGTH + ? LINE_LENGTH : read->NBases - i, + &read->base[i])) + err = 1; + + return err ? -1 : 0; +} + +int write_pln(char *fn, Read *read) { + FILE *fp; + + if ((fp = fopen(fn,"w")) == NULL) + return -1; + + if (fwrite_pln(fp, read)) { + fclose(fp); + return -1; + } + + fclose(fp); + return 0; +} +