diff srf2fastq/io_lib-1.12.2/io_lib/read_scf.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/srf2fastq/io_lib-1.12.2/io_lib/read_scf.c	Tue Jun 07 17:48:05 2011 -0400
@@ -0,0 +1,448 @@
+/*
+ * Copyright (c) Medical Research Council 1994. All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * this copyright and notice appears in all copies.
+ *
+ * This file was written by James Bonfield, Simon Dear, Rodger Staden,
+ * as part of the Staden Package at the MRC Laboratory of Molecular
+ * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
+ *
+ * MRC disclaims all warranties with regard to this software.
+ */
+
+/* 
+  Title:       read_scf.c
+  
+  Purpose:	 read IO of Standard Chromatogram Format sequences
+  Last update:   August 18 1994
+  
+  Change log:
+  4 Feb 1992,  Now draft proposal version 2
+  20 Feb 1992, Grab info from comment lines
+  19 Aug 1992, If SCF file has clip information, don't clip automatically
+  10 Nov 1992  SCF comments now stored in seq data structure
+  18 Aug 1994  Renamed from  ReadIOSCF.c; now purely SCF IO (no Seq structs)
+
+*/
+
+/* ---- Imports ---- */
+
+#include <ctype.h>
+#include <stdio.h>    /* IMPORT: fopen, fclose, fseek, ftell, fgetc,
+			 EOF */
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "io_lib/mach-io.h"
+#include "io_lib/xalloc.h"
+#include "io_lib/compress.h"
+#include "io_lib/Read.h"
+
+#include "io_lib/stdio_hack.h"
+#include "io_lib/scf.h"      /* SCF structures */
+
+
+/* SunOS4 has it's definitions in unistd, which we won't include for compat. */
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+#endif
+
+/* ---- Exported functions ---- */
+
+int read_scf_header(FILE *fp, Header *h)
+{
+    int i;
+
+    if (be_read_int_4(fp,&h->magic_number)==False)        return -1;
+
+    if (h->magic_number != SCF_MAGIC)
+	return -1;
+
+    if (be_read_int_4(fp,&h->samples)==False)             return -1;
+    if (be_read_int_4(fp,&h->samples_offset)==False)      return -1;
+    if (be_read_int_4(fp,&h->bases)==False)               return -1;
+    if (be_read_int_4(fp,&h->bases_left_clip)==False)     return -1;
+    if (be_read_int_4(fp,&h->bases_right_clip)==False)    return -1;
+    if (be_read_int_4(fp,&h->bases_offset)==False)        return -1;
+    if (be_read_int_4(fp,&h->comments_size)==False)       return -1;
+    if (be_read_int_4(fp,&h->comments_offset)==False)     return -1;
+    if (fread(&h->version[0],sizeof(h->version),1,fp)!=1) return -1;
+    if (be_read_int_4(fp,&h->sample_size)==False)         return -1;
+    if (be_read_int_4(fp,&h->code_set)==False)            return -1;
+    if (be_read_int_4(fp,&h->private_size)==False)        return -1;
+    if (be_read_int_4(fp,&h->private_offset)==False)      return -1;
+    for (i=0;i<18;i++)
+	if (be_read_int_4(fp,&h->spare[i])==False)        return -1;
+    
+    return 0;
+}
+
+
+int read_scf_sample1(FILE *fp, Samples1 *s)
+{
+    uint_1 buf[4];
+
+    if (4 != fread(buf, 1, 4, fp)) return -1;
+    s->sample_A = buf[0];
+    s->sample_C = buf[1];
+    s->sample_G = buf[2];
+    s->sample_T = buf[3];
+
+/*
+    if (1 != fread(s, 4, 1, fp)) return -1;
+*/
+
+    return 0;
+}
+
+
+int read_scf_sample2(FILE *fp, Samples2 *s)
+{
+    uint_2 buf[4];
+
+    if (4 != fread(buf, 2, 4, fp)) return -1;
+    s->sample_A = be_int2(buf[0]);
+    s->sample_C = be_int2(buf[1]);
+    s->sample_G = be_int2(buf[2]);
+    s->sample_T = be_int2(buf[3]);
+    
+    return 0;
+}
+
+int read_scf_samples1(FILE *fp, Samples1 *s, size_t num_samples) {
+    size_t i;
+
+    for (i = 0; i < num_samples; i++) {
+	if (-1 == read_scf_sample1(fp, &(s[i])))
+	    return -1;
+    }
+
+    return 0;
+}
+
+
+int read_scf_samples2(FILE *fp, Samples2 *s, size_t num_samples) {
+    size_t i;
+
+    for (i = 0; i < num_samples; i++) {
+	if (-1 == read_scf_sample2(fp, &(s[i])))
+	    return -1;
+    }
+
+    return 0;
+}
+
+
+int read_scf_samples32(FILE *fp, Samples2 *s, size_t num_samples) {
+    size_t i;
+    uint2 *samples_out;
+
+    /* version to read delta delta data in 2 bytes */
+
+    if ( ! (samples_out = (uint2 *)xmalloc((num_samples+1) * 
+					    sizeof(uint2)))) {
+	return -1;
+    }
+
+
+    if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
+#ifdef SP_LITTLE_ENDIAN
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = be_int2(samples_out[i]);
+    }
+#endif
+    scf_delta_samples2 ( samples_out, num_samples, 0);
+    for (i = 0; i < num_samples; i++) {
+	(&s[i])->sample_A = samples_out[i];
+    }
+
+    if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
+#ifdef SP_LITTLE_ENDIAN
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = be_int2(samples_out[i]);
+    }
+#endif
+    scf_delta_samples2 ( samples_out, num_samples, 0);
+    for (i = 0; i < num_samples; i++) {
+	(&s[i])->sample_C = samples_out[i];
+    }
+
+    if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
+#ifdef SP_LITTLE_ENDIAN
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = be_int2(samples_out[i]);
+    }
+#endif
+    scf_delta_samples2 ( samples_out, num_samples, 0);
+    for (i = 0; i < num_samples; i++) {
+	(&s[i])->sample_G = samples_out[i];
+    }
+
+    if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
+#ifdef SP_LITTLE_ENDIAN
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = be_int2(samples_out[i]);
+    }
+#endif
+    scf_delta_samples2 ( samples_out, num_samples, 0);
+    for (i = 0; i < num_samples; i++) {
+	(&s[i])->sample_T = samples_out[i];
+    }
+    xfree(samples_out);
+    return 0;
+}
+
+int read_scf_samples31(FILE *fp, Samples1 *s, size_t num_samples) {
+    size_t i;
+    int1 *samples_out;
+
+    /* version to read delta delta data in 1 byte */
+
+    if ( ! (samples_out = (int1 *)xmalloc((num_samples+1) * 
+					    sizeof(int1)))) {
+	return -1;
+    }
+
+    if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
+    scf_delta_samples1 ( samples_out, num_samples, 0);
+    for (i = 0; i < num_samples; i++) {
+	(&s[i])->sample_A = samples_out[i];
+    }
+
+    if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
+    scf_delta_samples1 ( samples_out, num_samples, 0);
+    for (i = 0; i < num_samples; i++) {
+	(&s[i])->sample_C = samples_out[i];
+    }
+
+    if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
+    scf_delta_samples1 ( samples_out, num_samples, 0);
+    for (i = 0; i < num_samples; i++) {
+	(&s[i])->sample_G = samples_out[i];
+    }
+
+    if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
+    scf_delta_samples1 ( samples_out, num_samples, 0);
+    for (i = 0; i < num_samples; i++) {
+	(&s[i])->sample_T = samples_out[i];
+    }
+
+    xfree(samples_out);
+    return 0;
+}
+
+int read_scf_base(FILE *fp, Bases *b)
+{
+    uint_1 buf[12];
+
+    if (1 != fread(buf, 12, 1, fp)) return -1;
+    b->peak_index = be_int4(((uint_4 *)buf)[0]);
+    b->prob_A = buf[4];
+    b->prob_C = buf[5];
+    b->prob_G = buf[6];
+    b->prob_T = buf[7];
+    b->base   = buf[8];
+    b->spare[0] = buf[9];
+    b->spare[1] = buf[10];
+    b->spare[2] = buf[11];
+
+    return 0;
+}
+
+
+int read_scf_bases(FILE *fp, Bases *b, size_t num_bases) {
+    size_t i;
+
+    for (i = 0; i < num_bases; i++) {
+	if (-1 == read_scf_base(fp, &(b[i])))
+	    return -1;
+    }
+
+    return 0;
+}
+
+int read_scf_bases3(FILE *fp, Bases *b, size_t num_bases)
+{
+    size_t i;
+    uint_4 *buf4;
+    uint_1 *buf1;
+
+    if (NULL == (buf4 = (uint_4 *)xmalloc(1 + 4 * num_bases)))
+	return -1;
+
+    if (NULL == (buf1 = (uint_1 *)xmalloc(1 + 8 * num_bases))) {
+	xfree(buf4);
+	return -1;
+    }
+
+    if (num_bases != fread(buf4, 4, num_bases, fp)) return -1;
+    for (i=0; i < num_bases; i++)
+	(&b[i])->peak_index = be_int4(buf4[i]);
+
+    if (8 * num_bases != fread(buf1, 1, 8 * num_bases, fp)) return -1;
+
+    for (i=0; i < num_bases; i++) {
+	(&b[i])->prob_A   = buf1[i];
+	(&b[i])->prob_C   = buf1[i+num_bases];
+	(&b[i])->prob_G   = buf1[i+2*num_bases];
+	(&b[i])->prob_T   = buf1[i+3*num_bases];
+	(&b[i])->base     = buf1[i+4*num_bases];
+	(&b[i])->spare[0] = buf1[i+5*num_bases];
+	(&b[i])->spare[1] = buf1[i+6*num_bases];
+	(&b[i])->spare[2] = buf1[i+7*num_bases];
+    }
+
+    xfree(buf4);
+    xfree(buf1);
+
+    return 0;
+}
+
+
+
+int read_scf_comment(FILE *fp, Comments *c, size_t s)
+{
+    if (fread(c, 1, s, fp) != s) return -1;
+
+    return 0;
+}
+
+
+/*
+ * Read the SCF format sequence from FILE *fp into a 'scf' structure.
+ * A NULL result indicates failure.
+ */
+Scf *fread_scf(FILE *fp) {
+    Scf *scf;
+    Header h;
+    int err;
+    float scf_version;
+    int sections = read_sections(0);
+
+    /* Read header */
+    if (read_scf_header(fp, &h) == -1) {
+	return NULL;
+    }
+
+    /* Allocate memory */
+    if (NULL == (scf = scf_allocate(h.samples, h.sample_size,
+				    h.bases, h.comments_size,
+				    h.private_size))) 
+	return NULL;
+
+    /* fake things for older style SCF -- SD */
+    if (h.sample_size != 1 && h.sample_size != 2) h.sample_size = 1;
+
+    scf_version = scf_version_str2float(h.version);
+
+    memcpy(&scf->header, &h, sizeof(Header));
+
+    if (sections & READ_SAMPLES) {
+	/* Read samples */
+	if (fseek(fp, (off_t)h.samples_offset, 0 /* SEEK_SET */) != 0) {
+	    scf_deallocate(scf);
+	    return NULL;
+	}
+
+	if ( 2.9 > scf_version ) {
+
+	    if (h.sample_size == 1) {
+		err= read_scf_samples1(fp, scf->samples.samples1, h.samples);
+	    }
+	    else {
+		err= read_scf_samples2(fp, scf->samples.samples2, h.samples);
+	    }
+	}
+	else {
+
+	    if (h.sample_size == 1) {
+		err= read_scf_samples31(fp, scf->samples.samples1, h.samples);
+	    } 
+	    else {
+		err= read_scf_samples32(fp, scf->samples.samples2, h.samples);
+	    }
+	}
+	if (-1 == err) {
+	    scf_deallocate(scf);
+	    return NULL;
+	}
+    }
+
+    if (sections & READ_BASES) {
+	/* Read bases */
+	if (fseek(fp, (off_t)h.bases_offset, 0 /* SEEK_SET */) != 0) {
+	    scf_deallocate(scf);
+	    return NULL;
+	}
+
+	if ( 2.9 > scf_version ) {
+
+	    if (-1 == read_scf_bases(fp, scf->bases, h.bases)) {
+		scf_deallocate(scf);
+		return NULL;
+	    }
+	}
+	else {
+	
+	    if (-1 == read_scf_bases3(fp, scf->bases, h.bases)) {
+		scf_deallocate(scf);
+		return NULL;
+	    }
+	}
+    }
+
+    if (sections & READ_COMMENTS) {
+	/* Read comments */
+	if (scf->comments) {
+	    if (fseek(fp,(off_t)(h.comments_offset), 0) != 0
+		|| -1 == read_scf_comment(fp, scf->comments,
+					  h.comments_size)) {
+		/*
+		 * Was: "scf_deallocate(scf); return NULL;".
+		 * We now simply clear the comments and gracefully continue.
+		 */
+		fprintf(stderr, "Warning: SCF file had invalid comment field\n");
+		xfree(scf->comments);
+		scf->comments = NULL;
+	    } else {
+		scf->comments[h.comments_size] = '\0';
+	    }
+	}
+    }
+
+    /* Read private data */
+    if (h.private_size) {
+	if (-1 == fseek(fp, (off_t)(h.private_offset), 0) ||
+	    h.private_size != fread(scf->private_data, 1, h.private_size, fp)){
+	    scf_deallocate(scf);
+	    return NULL;
+	}
+    }
+
+    return scf;
+}
+
+/*
+ * Read the SCF format sequence with name `fn' into a 'scf' structure.
+ * A NULL result indicates failure.
+ */
+Scf *read_scf(char *fn) {
+    Scf *scf;
+
+    FILE *fp;
+
+    /* Open fn for reading in binary mode */
+
+    if (NULL == (fp = fopen_compressed(fn, NULL)))
+	return NULL;
+
+    scf = fread_scf(fp);
+    fclose(fp);
+
+    return scf;
+}