diff srf2fastq/io_lib-1.12.2/io_lib/seqIOPlain.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/srf2fastq/io_lib-1.12.2/io_lib/seqIOPlain.c	Tue Jun 07 17:48:05 2011 -0400
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) Medical Research Council 1994. All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * this copyright and notice appears in all copies.
+ *
+ * This file was written by James Bonfield, Simon Dear, Rodger Staden,
+ * as part of the Staden Package at the MRC Laboratory of Molecular
+ * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
+ *
+ * MRC disclaims all warranties with regard to this software.
+ */
+
+/* 
+  Title:       seqIOPlain
+  
+  File: 	 seqIOPlain.c
+  Purpose:	 IO of plain sequences
+  Last update: Tuesday Jan 15 1991
+  
+  Change log:
+  
+  28.11.90 SD  put undesirables under STLOUIS compilation flag
+  15.01.91 SD  new include file (opp.h)
+  17.09.91 LFW changed STLOUIS compilation flag to SAVE_EDITS
+  and AUTO_CLIP
+  08.09.94 JKB Plain files now also uses the ';<' and ';>' lines.
+  09.09.94 JKB Update to use Read instead of Seq library.
+  01.06.07 JKB Supports single-read fasta files; about time too!
+  */
+
+
+#define LINE_LENGTH 60
+
+/*
+ * This module should be regarded as part of `read' since it is
+ * privy to the internal structure of `Read'.
+ *
+ * This library also requires use of the mach-io code for the endian
+ * independent machine IO.
+ *
+ * Any references to the writing or reading of edited sequences,
+ * or to the bottom strand were added by lfw
+ */
+
+
+
+
+/* ---- Imports ---- */
+
+#include <stdio.h>      /* IMPORT: fopen, fclose, fseek, ftell, fgetc */
+#include <ctype.h>      /* IMPORT: isprint */
+#include <string.h>
+
+#include "io_lib/misc.h"
+#include "io_lib/plain.h"
+#include "io_lib/Read.h"
+#include "io_lib/xalloc.h"
+#include "io_lib/traceType.h"
+
+#include "io_lib/stdio_hack.h"
+/* ---- Constants ---- */
+
+#define BasesPerLine 50 /* For output formatting */
+
+
+/* ---- Exports ---- */
+
+
+/*
+ * Read the plain format sequence from FILE *fp into a Read structure.
+ * All printing characters (as defined by ANSII C `isprint')
+ * are accepted, but `N's are translated to `-'s.
+ *
+ * Returns:
+ *   Read *     - Success, the Read structure read.
+ *   NULLRead   - Failure.
+ */
+Read *fread_pln(FILE *fp) {
+    Read *read = NULLRead;
+    off_t fileLen;
+    int  ch;
+    char *leftc, *rightc, *leftcp, *rightcp;
+    int first = 1;
+
+    /*
+     * Find the length of the file.
+     * Use this as an overestimate of the length of the sequence.
+     */
+    fseek(fp, (off_t) 0, 2);
+    if ((fileLen = ftell(fp)) > INT_MAX /*Was MAXINT2*/)
+	goto bail_out;
+
+    fseek(fp, (off_t) 0, 0);
+    
+    /* Allocate the sequence */
+    if (NULLRead == (read = read_allocate(0, fileLen)))
+	goto bail_out;
+
+    if (NULL == (leftc = (char *)xmalloc(fileLen)))
+	goto bail_out;
+
+    if (NULL == (rightc = (char *)xmalloc(fileLen)))
+	goto bail_out;
+
+    leftcp = leftc;
+    rightcp = rightc;
+
+    /* Read in the bases */
+    
+    read->NBases = 0;
+    read->format = TT_PLN;
+
+    while ((ch = fgetc(fp)) != EOF) {
+	if (ch == '>') {
+	    /* Fasta format file - skip the header and load the first
+	     * fasta sequence only. We don't even attempt to worry about
+	     * multi-sequence file formats for now.
+	     */
+	    if (!first)
+		break;
+
+	    while(ch != '\n' && ch != EOF)
+		ch = fgetc(fp);
+
+	}  else if (ch==';') {
+	    /*
+	     * ;< is left cutoff,
+	     * ;> is right cutoff.
+	     * Any other ';'s we can treat as a comments.
+	     */
+	    ch = fgetc(fp);
+
+	    if (first == 1 && ch != '<' && ch != '>') {
+		int d;
+		char type[5], name[17], line[1024];
+
+		line[0] = ch;
+		fgets(&line[1], 1022, fp);
+
+		if (5 == sscanf(line, "%6d%6d%6d%4c%s",
+				&d, &d, &d, type, name)) {
+		    char * p;
+
+		    if (p = strchr(type, ' '))
+			*p = 0;
+
+		    read->format = trace_type_str2int(type);
+		    read->trace_name = (char *)xmalloc(strlen(name)+1);
+		    if (read->trace_name)
+			strcpy(read->trace_name, name);
+		}
+	    }
+
+	    else if (ch == '<') {
+		ch = fgetc(fp);
+		while (ch != '\n') {
+		    *leftcp++ = ch;
+		    ch = fgetc(fp);
+		}
+	    } else if (ch == '>') {
+		ch = fgetc(fp);
+		while (ch != '\n') {
+		    *rightcp++ = ch;
+		    ch = fgetc(fp);
+		}
+	    } else {
+		while(ch != '\n' && ch != EOF)
+		    ch = fgetc(fp);
+	    }
+        } else if (isprint(ch) && !isspace(ch)) {
+	    read->base[read->NBases++] = ((ch)=='N') ? '-' : (ch);
+	}
+	
+	first = 0;
+    }
+
+    *leftcp = *rightcp = 0;
+
+    read->leftCutoff = strlen(leftc);
+    read->rightCutoff = read->leftCutoff + read->NBases + 1;
+    memmove(&read->base[read->leftCutoff], read->base, read->NBases);
+    memmove(read->base, leftc, read->leftCutoff);
+    memmove(&read->base[read->leftCutoff + read->NBases],
+	    rightc, strlen(rightc));
+
+    read->NBases += read->leftCutoff + strlen(rightc);
+    read->base[read->NBases] = 0;
+
+    xfree(leftc);
+    xfree(rightc);
+    
+    /* SUCCESS */
+    return(read);
+
+    /* FAILURE */
+ bail_out:
+    if (read)
+	read_deallocate(read);
+
+    return NULLRead;
+}
+
+/*
+ * Read the plain format sequence with name `fn' into a Read structure.
+ * All printing characters (as defined by ANSII C `isprint')
+ * are accepted, but `N's are translated to `-'s.
+ *
+ * Returns:
+ *   Read *     - Success, the Read structure read.
+ *   NULLRead   - Failure.
+ */
+Read *read_pln(char *fn) {
+    FILE *fp;
+    Read *read;
+
+    /* Open file */
+    if ((fp = fopen(fn, "r")) == NULL)
+	return NULLRead;
+
+    read = fread_pln(fp);
+    fclose(fp);
+
+    if (read && read->trace_name == NULL &&
+	(read->trace_name = (char *)xmalloc(strlen(fn)+1)))
+	strcpy(read->trace_name, fn);
+
+    return read;
+}
+
+
+/*
+ * Write to a Plain file
+ */
+int fwrite_pln(FILE *fp, Read *read) {
+    int i, err = 0;
+
+    for (i = 0; i < read->NBases; i += LINE_LENGTH)
+        if (-1 == fprintf(fp, "%.*s\n",
+			  read->NBases - i > LINE_LENGTH
+			  ? LINE_LENGTH : read->NBases - i,
+			  &read->base[i]))
+	    err = 1;
+    
+    return err ? -1 : 0;
+}
+
+int write_pln(char *fn, Read *read) {
+    FILE *fp;
+
+    if ((fp = fopen(fn,"w")) == NULL) 
+	return -1;
+
+    if (fwrite_pln(fp, read)) {
+	fclose(fp);
+	return -1;
+    }
+
+    fclose(fp);
+    return 0;
+}
+