view srf2fastq/io_lib-1.12.2/io_lib/read_scf.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
line wrap: on
line source

/*
 * Copyright (c) Medical Research Council 1994. All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * this copyright and notice appears in all copies.
 *
 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
 * as part of the Staden Package at the MRC Laboratory of Molecular
 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
 *
 * MRC disclaims all warranties with regard to this software.
 */

/* 
  Title:       read_scf.c
  
  Purpose:	 read IO of Standard Chromatogram Format sequences
  Last update:   August 18 1994
  
  Change log:
  4 Feb 1992,  Now draft proposal version 2
  20 Feb 1992, Grab info from comment lines
  19 Aug 1992, If SCF file has clip information, don't clip automatically
  10 Nov 1992  SCF comments now stored in seq data structure
  18 Aug 1994  Renamed from  ReadIOSCF.c; now purely SCF IO (no Seq structs)

*/

/* ---- Imports ---- */

#include <ctype.h>
#include <stdio.h>    /* IMPORT: fopen, fclose, fseek, ftell, fgetc,
			 EOF */
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

#include "io_lib/mach-io.h"
#include "io_lib/xalloc.h"
#include "io_lib/compress.h"
#include "io_lib/Read.h"

#include "io_lib/stdio_hack.h"
#include "io_lib/scf.h"      /* SCF structures */


/* SunOS4 has it's definitions in unistd, which we won't include for compat. */
#ifndef SEEK_SET
#define SEEK_SET 0
#define SEEK_CUR 1
#define SEEK_END 2
#endif

/* ---- Exported functions ---- */

int read_scf_header(FILE *fp, Header *h)
{
    int i;

    if (be_read_int_4(fp,&h->magic_number)==False)        return -1;

    if (h->magic_number != SCF_MAGIC)
	return -1;

    if (be_read_int_4(fp,&h->samples)==False)             return -1;
    if (be_read_int_4(fp,&h->samples_offset)==False)      return -1;
    if (be_read_int_4(fp,&h->bases)==False)               return -1;
    if (be_read_int_4(fp,&h->bases_left_clip)==False)     return -1;
    if (be_read_int_4(fp,&h->bases_right_clip)==False)    return -1;
    if (be_read_int_4(fp,&h->bases_offset)==False)        return -1;
    if (be_read_int_4(fp,&h->comments_size)==False)       return -1;
    if (be_read_int_4(fp,&h->comments_offset)==False)     return -1;
    if (fread(&h->version[0],sizeof(h->version),1,fp)!=1) return -1;
    if (be_read_int_4(fp,&h->sample_size)==False)         return -1;
    if (be_read_int_4(fp,&h->code_set)==False)            return -1;
    if (be_read_int_4(fp,&h->private_size)==False)        return -1;
    if (be_read_int_4(fp,&h->private_offset)==False)      return -1;
    for (i=0;i<18;i++)
	if (be_read_int_4(fp,&h->spare[i])==False)        return -1;
    
    return 0;
}


int read_scf_sample1(FILE *fp, Samples1 *s)
{
    uint_1 buf[4];

    if (4 != fread(buf, 1, 4, fp)) return -1;
    s->sample_A = buf[0];
    s->sample_C = buf[1];
    s->sample_G = buf[2];
    s->sample_T = buf[3];

/*
    if (1 != fread(s, 4, 1, fp)) return -1;
*/

    return 0;
}


int read_scf_sample2(FILE *fp, Samples2 *s)
{
    uint_2 buf[4];

    if (4 != fread(buf, 2, 4, fp)) return -1;
    s->sample_A = be_int2(buf[0]);
    s->sample_C = be_int2(buf[1]);
    s->sample_G = be_int2(buf[2]);
    s->sample_T = be_int2(buf[3]);
    
    return 0;
}

int read_scf_samples1(FILE *fp, Samples1 *s, size_t num_samples) {
    size_t i;

    for (i = 0; i < num_samples; i++) {
	if (-1 == read_scf_sample1(fp, &(s[i])))
	    return -1;
    }

    return 0;
}


int read_scf_samples2(FILE *fp, Samples2 *s, size_t num_samples) {
    size_t i;

    for (i = 0; i < num_samples; i++) {
	if (-1 == read_scf_sample2(fp, &(s[i])))
	    return -1;
    }

    return 0;
}


int read_scf_samples32(FILE *fp, Samples2 *s, size_t num_samples) {
    size_t i;
    uint2 *samples_out;

    /* version to read delta delta data in 2 bytes */

    if ( ! (samples_out = (uint2 *)xmalloc((num_samples+1) * 
					    sizeof(uint2)))) {
	return -1;
    }


    if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
#ifdef SP_LITTLE_ENDIAN
    for (i = 0; i < num_samples; i++) {
	samples_out[i] = be_int2(samples_out[i]);
    }
#endif
    scf_delta_samples2 ( samples_out, num_samples, 0);
    for (i = 0; i < num_samples; i++) {
	(&s[i])->sample_A = samples_out[i];
    }

    if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
#ifdef SP_LITTLE_ENDIAN
    for (i = 0; i < num_samples; i++) {
	samples_out[i] = be_int2(samples_out[i]);
    }
#endif
    scf_delta_samples2 ( samples_out, num_samples, 0);
    for (i = 0; i < num_samples; i++) {
	(&s[i])->sample_C = samples_out[i];
    }

    if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
#ifdef SP_LITTLE_ENDIAN
    for (i = 0; i < num_samples; i++) {
	samples_out[i] = be_int2(samples_out[i]);
    }
#endif
    scf_delta_samples2 ( samples_out, num_samples, 0);
    for (i = 0; i < num_samples; i++) {
	(&s[i])->sample_G = samples_out[i];
    }

    if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
#ifdef SP_LITTLE_ENDIAN
    for (i = 0; i < num_samples; i++) {
	samples_out[i] = be_int2(samples_out[i]);
    }
#endif
    scf_delta_samples2 ( samples_out, num_samples, 0);
    for (i = 0; i < num_samples; i++) {
	(&s[i])->sample_T = samples_out[i];
    }
    xfree(samples_out);
    return 0;
}

int read_scf_samples31(FILE *fp, Samples1 *s, size_t num_samples) {
    size_t i;
    int1 *samples_out;

    /* version to read delta delta data in 1 byte */

    if ( ! (samples_out = (int1 *)xmalloc((num_samples+1) * 
					    sizeof(int1)))) {
	return -1;
    }

    if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
    scf_delta_samples1 ( samples_out, num_samples, 0);
    for (i = 0; i < num_samples; i++) {
	(&s[i])->sample_A = samples_out[i];
    }

    if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
    scf_delta_samples1 ( samples_out, num_samples, 0);
    for (i = 0; i < num_samples; i++) {
	(&s[i])->sample_C = samples_out[i];
    }

    if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
    scf_delta_samples1 ( samples_out, num_samples, 0);
    for (i = 0; i < num_samples; i++) {
	(&s[i])->sample_G = samples_out[i];
    }

    if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
    scf_delta_samples1 ( samples_out, num_samples, 0);
    for (i = 0; i < num_samples; i++) {
	(&s[i])->sample_T = samples_out[i];
    }

    xfree(samples_out);
    return 0;
}

int read_scf_base(FILE *fp, Bases *b)
{
    uint_1 buf[12];

    if (1 != fread(buf, 12, 1, fp)) return -1;
    b->peak_index = be_int4(((uint_4 *)buf)[0]);
    b->prob_A = buf[4];
    b->prob_C = buf[5];
    b->prob_G = buf[6];
    b->prob_T = buf[7];
    b->base   = buf[8];
    b->spare[0] = buf[9];
    b->spare[1] = buf[10];
    b->spare[2] = buf[11];

    return 0;
}


int read_scf_bases(FILE *fp, Bases *b, size_t num_bases) {
    size_t i;

    for (i = 0; i < num_bases; i++) {
	if (-1 == read_scf_base(fp, &(b[i])))
	    return -1;
    }

    return 0;
}

int read_scf_bases3(FILE *fp, Bases *b, size_t num_bases)
{
    size_t i;
    uint_4 *buf4;
    uint_1 *buf1;

    if (NULL == (buf4 = (uint_4 *)xmalloc(1 + 4 * num_bases)))
	return -1;

    if (NULL == (buf1 = (uint_1 *)xmalloc(1 + 8 * num_bases))) {
	xfree(buf4);
	return -1;
    }

    if (num_bases != fread(buf4, 4, num_bases, fp)) return -1;
    for (i=0; i < num_bases; i++)
	(&b[i])->peak_index = be_int4(buf4[i]);

    if (8 * num_bases != fread(buf1, 1, 8 * num_bases, fp)) return -1;

    for (i=0; i < num_bases; i++) {
	(&b[i])->prob_A   = buf1[i];
	(&b[i])->prob_C   = buf1[i+num_bases];
	(&b[i])->prob_G   = buf1[i+2*num_bases];
	(&b[i])->prob_T   = buf1[i+3*num_bases];
	(&b[i])->base     = buf1[i+4*num_bases];
	(&b[i])->spare[0] = buf1[i+5*num_bases];
	(&b[i])->spare[1] = buf1[i+6*num_bases];
	(&b[i])->spare[2] = buf1[i+7*num_bases];
    }

    xfree(buf4);
    xfree(buf1);

    return 0;
}



int read_scf_comment(FILE *fp, Comments *c, size_t s)
{
    if (fread(c, 1, s, fp) != s) return -1;

    return 0;
}


/*
 * Read the SCF format sequence from FILE *fp into a 'scf' structure.
 * A NULL result indicates failure.
 */
Scf *fread_scf(FILE *fp) {
    Scf *scf;
    Header h;
    int err;
    float scf_version;
    int sections = read_sections(0);

    /* Read header */
    if (read_scf_header(fp, &h) == -1) {
	return NULL;
    }

    /* Allocate memory */
    if (NULL == (scf = scf_allocate(h.samples, h.sample_size,
				    h.bases, h.comments_size,
				    h.private_size))) 
	return NULL;

    /* fake things for older style SCF -- SD */
    if (h.sample_size != 1 && h.sample_size != 2) h.sample_size = 1;

    scf_version = scf_version_str2float(h.version);

    memcpy(&scf->header, &h, sizeof(Header));

    if (sections & READ_SAMPLES) {
	/* Read samples */
	if (fseek(fp, (off_t)h.samples_offset, 0 /* SEEK_SET */) != 0) {
	    scf_deallocate(scf);
	    return NULL;
	}

	if ( 2.9 > scf_version ) {

	    if (h.sample_size == 1) {
		err= read_scf_samples1(fp, scf->samples.samples1, h.samples);
	    }
	    else {
		err= read_scf_samples2(fp, scf->samples.samples2, h.samples);
	    }
	}
	else {

	    if (h.sample_size == 1) {
		err= read_scf_samples31(fp, scf->samples.samples1, h.samples);
	    } 
	    else {
		err= read_scf_samples32(fp, scf->samples.samples2, h.samples);
	    }
	}
	if (-1 == err) {
	    scf_deallocate(scf);
	    return NULL;
	}
    }

    if (sections & READ_BASES) {
	/* Read bases */
	if (fseek(fp, (off_t)h.bases_offset, 0 /* SEEK_SET */) != 0) {
	    scf_deallocate(scf);
	    return NULL;
	}

	if ( 2.9 > scf_version ) {

	    if (-1 == read_scf_bases(fp, scf->bases, h.bases)) {
		scf_deallocate(scf);
		return NULL;
	    }
	}
	else {
	
	    if (-1 == read_scf_bases3(fp, scf->bases, h.bases)) {
		scf_deallocate(scf);
		return NULL;
	    }
	}
    }

    if (sections & READ_COMMENTS) {
	/* Read comments */
	if (scf->comments) {
	    if (fseek(fp,(off_t)(h.comments_offset), 0) != 0
		|| -1 == read_scf_comment(fp, scf->comments,
					  h.comments_size)) {
		/*
		 * Was: "scf_deallocate(scf); return NULL;".
		 * We now simply clear the comments and gracefully continue.
		 */
		fprintf(stderr, "Warning: SCF file had invalid comment field\n");
		xfree(scf->comments);
		scf->comments = NULL;
	    } else {
		scf->comments[h.comments_size] = '\0';
	    }
	}
    }

    /* Read private data */
    if (h.private_size) {
	if (-1 == fseek(fp, (off_t)(h.private_offset), 0) ||
	    h.private_size != fread(scf->private_data, 1, h.private_size, fp)){
	    scf_deallocate(scf);
	    return NULL;
	}
    }

    return scf;
}

/*
 * Read the SCF format sequence with name `fn' into a 'scf' structure.
 * A NULL result indicates failure.
 */
Scf *read_scf(char *fn) {
    Scf *scf;

    FILE *fp;

    /* Open fn for reading in binary mode */

    if (NULL == (fp = fopen_compressed(fn, NULL)))
	return NULL;

    scf = fread_scf(fp);
    fclose(fp);

    return scf;
}