view srf2fastq/io_lib-1.12.2/io_lib/write_scf.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
line wrap: on
line source

/*
 * Copyright (c) Medical Research Council 1994. All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * this copyright and notice appears in all copies.
 *
 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
 * as part of the Staden Package at the MRC Laboratory of Molecular
 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
 *
 * MRC disclaims all warranties with regard to this software.
 */

/* 
    Title:       write_scf.c

    Purpose:	 Output of Standard Chromatogram Format sequences
    Last update: August 18 1994

    Change log:
    4 Feb 1992, Now draft proposal version 2
    23 Nov 92,  SCF 2.0 + LaDeana's changes
    11 Aug 93, Version 2.01 containing confidence values
    18 Aug 1994  Renamed from  writeSCF.c; now purely SCF IO (no Seq structs)

    Oct 95 major rewrite to make files more easily compressed.
    gzip now gets files to around 40% of original
    Version raised to 3.00
     * We store in order:
     *     Header
     *     Samples
     *     Bases
     *     Comments
     *     Private

     Two main types of change: 
     1: write data in lane order instead of all lanes together
     eg write Sample values for A, then Sample values for C, etc. 

     2: where appropriate write delta delta values instead of complete ones.
     ie write the differences in the differences between successive values

*/


static int scf_version = 3;

/* ---- Imports ---- */


#include <ctype.h>
#include <stdio.h>    /* IMPORT: fopen, fclose, fseek, ftell, fgetc,
		                 EOF */
#include <string.h>
#include "io_lib/scf.h"      /* IMPORT: scf structures */
#include "io_lib/mach-io.h"  /* IMPORT: be_write_int_1, be_write_int_2, be_write_int_4 */
#include "io_lib/xalloc.h"

#include "io_lib/stdio_hack.h"

/* ---- Exports ---- */


int write_scf_header(FILE *fp, Header *h)
{
    int i;

    if (be_write_int_4(fp,&h->magic_number)==False)     return -1;
    if (be_write_int_4(fp,&h->samples)==False)          return -1;
    if (be_write_int_4(fp,&h->samples_offset)==False)   return -1;
    if (be_write_int_4(fp,&h->bases)==False)            return -1;
    if (be_write_int_4(fp,&h->bases_left_clip)==False)  return -1;
    if (be_write_int_4(fp,&h->bases_right_clip)==False) return -1;
    if (be_write_int_4(fp,&h->bases_offset)==False)     return -1;
    if (be_write_int_4(fp,&h->comments_size)==False)    return -1;
    if (be_write_int_4(fp,&h->comments_offset)==False)  return -1;
    if (fwrite(h->version,sizeof(h->version),1,fp)!=1)  return -1;
    if (be_write_int_4(fp,&h->sample_size)==False)      return -1;
    if (be_write_int_4(fp,&h->code_set)==False)         return -1;
    if (be_write_int_4(fp,&h->private_size)==False)     return -1;
    if (be_write_int_4(fp,&h->private_offset)==False)   return -1;
    for (i=0;i<18;i++)
	if (be_write_int_4(fp,&h->spare[i])==False)     return -1;

    return 0;
}


int write_scf_sample1(FILE *fp, Samples1 *s)
{
    uint_1 buf[4];

    buf[0] = s->sample_A;
    buf[1] = s->sample_C;
    buf[2] = s->sample_G;
    buf[3] = s->sample_T;
    if (4 != fwrite(buf, 1, 4, fp)) return -1;

    return 0;
}


int write_scf_sample2(FILE *fp, Samples2 *s)
{
    uint_2 buf[4];

    buf[0] = be_int2(s->sample_A);
    buf[1] = be_int2(s->sample_C);
    buf[2] = be_int2(s->sample_G);
    buf[3] = be_int2(s->sample_T);
    if (4 != fwrite(buf, 2, 4, fp)) return -1;

    return 0;
}


int write_scf_samples1(FILE *fp, Samples1 *s, size_t num_samples) {
    size_t i;

    for (i = 0; i < num_samples; i++) {
	if (-1 == write_scf_sample1(fp, &(s[i])))
	    return -1;
    }

    return 0;
}


int write_scf_samples2(FILE *fp, Samples2 *s, size_t num_samples) {
    size_t i;

    for (i = 0; i < num_samples; i++) {
	if (-1 == write_scf_sample2(fp, &(s[i])))
	    return -1;
    }

    return 0;
}


int write_scf_samples31(FILE *fp, Samples1 *s, size_t num_samples) {
    size_t i;
    int1 *samples_out;

    if (!num_samples)
	return 0;

    if ( ! (samples_out = (int1 *)xmalloc(num_samples * 
					    sizeof(int1)))) {
	return -1;
    }

    for (i = 0; i < num_samples; i++) {
	samples_out[i] = (&s[i])->sample_A;
    }
    scf_delta_samples1 ( samples_out, num_samples, 1);
    if (num_samples != fwrite(samples_out, 1, num_samples, fp)) {
	xfree(samples_out);
	return -1;
    }

    for (i = 0; i < num_samples; i++) {
	samples_out[i] = (&s[i])->sample_C;
    }
    scf_delta_samples1 ( samples_out, num_samples, 1);
    if (num_samples != fwrite(samples_out, 1, num_samples, fp)) {
	xfree(samples_out);
	return -1;
    }

    for (i = 0; i < num_samples; i++) {
	samples_out[i] = (&s[i])->sample_G;
    }
    scf_delta_samples1 ( samples_out, num_samples, 1);
    if (num_samples != fwrite(samples_out, 1, num_samples, fp)) {
	xfree(samples_out);
	return -1;
    }

    for (i = 0; i < num_samples; i++) {
	samples_out[i] = (&s[i])->sample_T;
    }
    scf_delta_samples1 ( samples_out, num_samples, 1);
    if (num_samples != fwrite(samples_out, 1, num_samples, fp)) {
	xfree(samples_out);
	return -1;
    }

    xfree(samples_out);
    return 0;
}

int write_scf_samples32(FILE *fp, Samples2 *s, size_t num_samples) {
    size_t i;
    uint2 *samples_out;

    if (!num_samples)
	return 0;

    if ( ! (samples_out = (uint2 *)xmalloc(num_samples * sizeof(uint2)))) {
	return -1;
    }


    for (i = 0; i < num_samples; i++) {
	samples_out[i] = (&s[i])->sample_A;
    }
    scf_delta_samples2 ( samples_out, num_samples, 1);
#ifdef SP_LITTLE_ENDIAN
    for (i = 0; i < num_samples; i++) {
	samples_out[i] = be_int2(samples_out[i]);
    }
#endif
    if (num_samples != fwrite(samples_out, 2, num_samples, fp)) return -1;


    for (i = 0; i < num_samples; i++) {
	samples_out[i] = (&s[i])->sample_C;
    }
    scf_delta_samples2 ( samples_out, num_samples, 1);
#ifdef SP_LITTLE_ENDIAN
    for (i = 0; i < num_samples; i++) {
	samples_out[i] = be_int2(samples_out[i]);
    }
#endif
    if (num_samples != fwrite(samples_out, 2, num_samples, fp)) return -1;


    for (i = 0; i < num_samples; i++) {
	samples_out[i] = (&s[i])->sample_G;
    }
    scf_delta_samples2 ( samples_out, num_samples, 1);
#ifdef SP_LITTLE_ENDIAN
    for (i = 0; i < num_samples; i++) {
	samples_out[i] = be_int2(samples_out[i]);
    }
#endif
    if (num_samples != fwrite(samples_out, 2, num_samples, fp)) return -1;


    for (i = 0; i < num_samples; i++) {
	samples_out[i] = (&s[i])->sample_T;
    }
    scf_delta_samples2 ( samples_out, num_samples, 1);
#ifdef SP_LITTLE_ENDIAN
    for (i = 0; i < num_samples; i++) {
	samples_out[i] = be_int2(samples_out[i]);
    }
#endif
    if (num_samples != fwrite(samples_out, 2, num_samples, fp)) return -1;


    xfree(samples_out);
    return 0;
}


int write_scf_base(FILE *fp, Bases *b)
{
    uint_1 buf[12];

    ((uint_4 *)buf)[0] = be_int4(b->peak_index);
    buf[4] = b->prob_A;
    buf[5] = b->prob_C;
    buf[6] = b->prob_G;
    buf[7] = b->prob_T;
    buf[8] = b->base;
    buf[9] = b->spare[0];
    buf[10] = b->spare[1];
    buf[11] = b->spare[2];

    if (12 != fwrite(buf, 1, 12, fp)) return -1;

    return 0;
}


int write_scf_bases(FILE *fp, Bases *b, size_t num_bases)
{
    size_t i;

    for (i = 0; i < num_bases; i++) {
	if (-1 == write_scf_base(fp, &(b[i])))
	    return -1;
    }

    return 0;
}

int write_scf_bases3(FILE *fp, Bases *b, size_t num_bases)
{
    size_t i;
    uint_4 *buf4;
    uint_1 *buf1;

    if (NULL == (buf4 = (uint_4 *)xmalloc(1 + 4 * num_bases)))
	return -1;

    if (NULL == (buf1 = (uint_1 *)xmalloc(1 + 8 * num_bases))) {
	xfree(buf4);
	return -1;
    }

    for (i = 0; i < num_bases; i++) {
	buf4[i] = be_int4((&b[i])->peak_index);
    }
    fwrite(buf4, 4, num_bases, fp);
    
    for (i=0; i < num_bases; i++) {
	buf1[i            ] = (&b[i])->prob_A;
	buf1[i+  num_bases] = (&b[i])->prob_C;
	buf1[i+2*num_bases] = (&b[i])->prob_G;
	buf1[i+3*num_bases] = (&b[i])->prob_T;
	buf1[i+4*num_bases] = (&b[i])->base;
	buf1[i+5*num_bases] = (&b[i])->spare[0];
	buf1[i+6*num_bases] = (&b[i])->spare[1];
	buf1[i+7*num_bases] = (&b[i])->spare[2];
    }
    if (8 * num_bases != (fwrite(buf1, 1, 8 * num_bases, fp))) {
	xfree(buf1);
	xfree(buf4);
	return -1;
    }

    xfree(buf1);
    xfree(buf4);
    return 0;
}


int write_scf_comment(FILE *fp, Comments *c, size_t s)
{
    if (fwrite(c, 1, s, fp) != s) return -1;

    return 0;
}



/*
 * Request which (major) version of scf to use when writing.
 * Defaults to the latest. Currently suitable fields are
 * 2 and 3.
 *
 * Returns 0 for success, -1 for failure.
 */
int set_scf_version(int version) {
    if (version != 2 && version != 3)
	return -1;

    scf_version = version;
    return 0;
}

/*
 * Write Seq out as a .scf file to the 'fp' FILE *
 */
int fwrite_scf(Scf *scf, FILE *fp) {
    uint_4 size;
    int err;

    /*
     * Init header offsets.
     *
     * We store in order:
     *     Header
     *     Samples
     *     Bases
     *     Comments
     *     Private
     */
    scf->header.samples_offset = (uint_4)sizeof(Header);
    size = scf->header.samples * (scf->header.sample_size == 1 ?
				  sizeof(Samples1) : sizeof(Samples2));
    scf->header.bases_offset = (uint_4)(scf->header.samples_offset +
					 size);
    size = scf->header.bases * sizeof(Bases);
    scf->header.comments_offset = (uint_4)(scf->header.bases_offset + size);

    size = scf->header.comments_size;
    scf->header.private_offset = (uint_4)(scf->header.comments_offset + size);

    /* Init a few other things, such as the magic number */
    scf->header.magic_number = SCF_MAGIC;

    if (scf_version == 3) {
	memcpy(scf->header.version, scf_version_float2str(SCF_VERSION), 4);
    } else {
	memcpy(scf->header.version, scf_version_float2str(SCF_VERSION_OLD), 4);
    }

    /* Write header */
    if (write_scf_header(fp, &scf->header) == -1)
	return -1;

    if (scf_version == 3) {
	/* Write Samples */
	if (scf->header.sample_size == 1)
	    err = write_scf_samples31(fp, scf->samples.samples1,
				      scf->header.samples);
	else
	    err = write_scf_samples32(fp, scf->samples.samples2,
				      scf->header.samples);
	if (-1 == err)
	    return -1;
	
	/* Write Bases */
	if (-1 == write_scf_bases3(fp, scf->bases, scf->header.bases))
	    return -1;

    } else {
	/* Write Samples */
	if (scf->header.sample_size == 1)
	    err = write_scf_samples1(fp, scf->samples.samples1,
				     scf->header.samples);
	else
	    err = write_scf_samples2(fp, scf->samples.samples2,
				     scf->header.samples);
	if (-1 == err)
	    return -1;
	
	/* Write Bases */
	if (-1 == write_scf_bases(fp, scf->bases, scf->header.bases))
	    return -1;
    }

    /* Write Comments */
    if (-1 == write_scf_comment(fp, scf->comments,
				scf->header.comments_size))
	return -1;

    /* Write private data */
    if (scf->header.private_size) {
	if (scf->header.private_size  != fwrite(scf->private_data, 1,
						scf->header.private_size, fp))
	    return -1;
    }

    return 0;
}

/*
 * Write Seq out as a .scf file to file 'fn'.
 */
int write_scf(Scf *scf, char *fn)
{
    FILE *fp;

    /* Open for for write in binary mode */
    if ((fp = fopen(fn,"wb")) == NULL) 
	return -1;

    if (fwrite_scf(scf, fp)) {
	fclose(fp);
	return -1;
    }

    fclose(fp);
    return 0;
}