diff srf2fastq/io_lib-1.12.2/io_lib/write_scf.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/srf2fastq/io_lib-1.12.2/io_lib/write_scf.c	Tue Jun 07 17:48:05 2011 -0400
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) Medical Research Council 1994. All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * this copyright and notice appears in all copies.
+ *
+ * This file was written by James Bonfield, Simon Dear, Rodger Staden,
+ * as part of the Staden Package at the MRC Laboratory of Molecular
+ * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
+ *
+ * MRC disclaims all warranties with regard to this software.
+ */
+
+/* 
+    Title:       write_scf.c
+
+    Purpose:	 Output of Standard Chromatogram Format sequences
+    Last update: August 18 1994
+
+    Change log:
+    4 Feb 1992, Now draft proposal version 2
+    23 Nov 92,  SCF 2.0 + LaDeana's changes
+    11 Aug 93, Version 2.01 containing confidence values
+    18 Aug 1994  Renamed from  writeSCF.c; now purely SCF IO (no Seq structs)
+
+    Oct 95 major rewrite to make files more easily compressed.
+    gzip now gets files to around 40% of original
+    Version raised to 3.00
+     * We store in order:
+     *     Header
+     *     Samples
+     *     Bases
+     *     Comments
+     *     Private
+
+     Two main types of change: 
+     1: write data in lane order instead of all lanes together
+     eg write Sample values for A, then Sample values for C, etc. 
+
+     2: where appropriate write delta delta values instead of complete ones.
+     ie write the differences in the differences between successive values
+
+*/
+
+
+static int scf_version = 3;
+
+/* ---- Imports ---- */
+
+
+#include <ctype.h>
+#include <stdio.h>    /* IMPORT: fopen, fclose, fseek, ftell, fgetc,
+		                 EOF */
+#include <string.h>
+#include "io_lib/scf.h"      /* IMPORT: scf structures */
+#include "io_lib/mach-io.h"  /* IMPORT: be_write_int_1, be_write_int_2, be_write_int_4 */
+#include "io_lib/xalloc.h"
+
+#include "io_lib/stdio_hack.h"
+
+/* ---- Exports ---- */
+
+
+int write_scf_header(FILE *fp, Header *h)
+{
+    int i;
+
+    if (be_write_int_4(fp,&h->magic_number)==False)     return -1;
+    if (be_write_int_4(fp,&h->samples)==False)          return -1;
+    if (be_write_int_4(fp,&h->samples_offset)==False)   return -1;
+    if (be_write_int_4(fp,&h->bases)==False)            return -1;
+    if (be_write_int_4(fp,&h->bases_left_clip)==False)  return -1;
+    if (be_write_int_4(fp,&h->bases_right_clip)==False) return -1;
+    if (be_write_int_4(fp,&h->bases_offset)==False)     return -1;
+    if (be_write_int_4(fp,&h->comments_size)==False)    return -1;
+    if (be_write_int_4(fp,&h->comments_offset)==False)  return -1;
+    if (fwrite(h->version,sizeof(h->version),1,fp)!=1)  return -1;
+    if (be_write_int_4(fp,&h->sample_size)==False)      return -1;
+    if (be_write_int_4(fp,&h->code_set)==False)         return -1;
+    if (be_write_int_4(fp,&h->private_size)==False)     return -1;
+    if (be_write_int_4(fp,&h->private_offset)==False)   return -1;
+    for (i=0;i<18;i++)
+	if (be_write_int_4(fp,&h->spare[i])==False)     return -1;
+
+    return 0;
+}
+
+
+int write_scf_sample1(FILE *fp, Samples1 *s)
+{
+    uint_1 buf[4];
+
+    buf[0] = s->sample_A;
+    buf[1] = s->sample_C;
+    buf[2] = s->sample_G;
+    buf[3] = s->sample_T;
+    if (4 != fwrite(buf, 1, 4, fp)) return -1;
+
+    return 0;
+}
+
+
+int write_scf_sample2(FILE *fp, Samples2 *s)
+{
+    uint_2 buf[4];
+
+    buf[0] = be_int2(s->sample_A);
+    buf[1] = be_int2(s->sample_C);
+    buf[2] = be_int2(s->sample_G);
+    buf[3] = be_int2(s->sample_T);
+    if (4 != fwrite(buf, 2, 4, fp)) return -1;
+
+    return 0;
+}
+
+
+int write_scf_samples1(FILE *fp, Samples1 *s, size_t num_samples) {
+    size_t i;
+
+    for (i = 0; i < num_samples; i++) {
+	if (-1 == write_scf_sample1(fp, &(s[i])))
+	    return -1;
+    }
+
+    return 0;
+}
+
+
+int write_scf_samples2(FILE *fp, Samples2 *s, size_t num_samples) {
+    size_t i;
+
+    for (i = 0; i < num_samples; i++) {
+	if (-1 == write_scf_sample2(fp, &(s[i])))
+	    return -1;
+    }
+
+    return 0;
+}
+
+
+int write_scf_samples31(FILE *fp, Samples1 *s, size_t num_samples) {
+    size_t i;
+    int1 *samples_out;
+
+    if (!num_samples)
+	return 0;
+
+    if ( ! (samples_out = (int1 *)xmalloc(num_samples * 
+					    sizeof(int1)))) {
+	return -1;
+    }
+
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = (&s[i])->sample_A;
+    }
+    scf_delta_samples1 ( samples_out, num_samples, 1);
+    if (num_samples != fwrite(samples_out, 1, num_samples, fp)) {
+	xfree(samples_out);
+	return -1;
+    }
+
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = (&s[i])->sample_C;
+    }
+    scf_delta_samples1 ( samples_out, num_samples, 1);
+    if (num_samples != fwrite(samples_out, 1, num_samples, fp)) {
+	xfree(samples_out);
+	return -1;
+    }
+
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = (&s[i])->sample_G;
+    }
+    scf_delta_samples1 ( samples_out, num_samples, 1);
+    if (num_samples != fwrite(samples_out, 1, num_samples, fp)) {
+	xfree(samples_out);
+	return -1;
+    }
+
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = (&s[i])->sample_T;
+    }
+    scf_delta_samples1 ( samples_out, num_samples, 1);
+    if (num_samples != fwrite(samples_out, 1, num_samples, fp)) {
+	xfree(samples_out);
+	return -1;
+    }
+
+    xfree(samples_out);
+    return 0;
+}
+
+int write_scf_samples32(FILE *fp, Samples2 *s, size_t num_samples) {
+    size_t i;
+    uint2 *samples_out;
+
+    if (!num_samples)
+	return 0;
+
+    if ( ! (samples_out = (uint2 *)xmalloc(num_samples * sizeof(uint2)))) {
+	return -1;
+    }
+
+
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = (&s[i])->sample_A;
+    }
+    scf_delta_samples2 ( samples_out, num_samples, 1);
+#ifdef SP_LITTLE_ENDIAN
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = be_int2(samples_out[i]);
+    }
+#endif
+    if (num_samples != fwrite(samples_out, 2, num_samples, fp)) return -1;
+
+
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = (&s[i])->sample_C;
+    }
+    scf_delta_samples2 ( samples_out, num_samples, 1);
+#ifdef SP_LITTLE_ENDIAN
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = be_int2(samples_out[i]);
+    }
+#endif
+    if (num_samples != fwrite(samples_out, 2, num_samples, fp)) return -1;
+
+
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = (&s[i])->sample_G;
+    }
+    scf_delta_samples2 ( samples_out, num_samples, 1);
+#ifdef SP_LITTLE_ENDIAN
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = be_int2(samples_out[i]);
+    }
+#endif
+    if (num_samples != fwrite(samples_out, 2, num_samples, fp)) return -1;
+
+
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = (&s[i])->sample_T;
+    }
+    scf_delta_samples2 ( samples_out, num_samples, 1);
+#ifdef SP_LITTLE_ENDIAN
+    for (i = 0; i < num_samples; i++) {
+	samples_out[i] = be_int2(samples_out[i]);
+    }
+#endif
+    if (num_samples != fwrite(samples_out, 2, num_samples, fp)) return -1;
+
+
+    xfree(samples_out);
+    return 0;
+}
+
+
+int write_scf_base(FILE *fp, Bases *b)
+{
+    uint_1 buf[12];
+
+    ((uint_4 *)buf)[0] = be_int4(b->peak_index);
+    buf[4] = b->prob_A;
+    buf[5] = b->prob_C;
+    buf[6] = b->prob_G;
+    buf[7] = b->prob_T;
+    buf[8] = b->base;
+    buf[9] = b->spare[0];
+    buf[10] = b->spare[1];
+    buf[11] = b->spare[2];
+
+    if (12 != fwrite(buf, 1, 12, fp)) return -1;
+
+    return 0;
+}
+
+
+int write_scf_bases(FILE *fp, Bases *b, size_t num_bases)
+{
+    size_t i;
+
+    for (i = 0; i < num_bases; i++) {
+	if (-1 == write_scf_base(fp, &(b[i])))
+	    return -1;
+    }
+
+    return 0;
+}
+
+int write_scf_bases3(FILE *fp, Bases *b, size_t num_bases)
+{
+    size_t i;
+    uint_4 *buf4;
+    uint_1 *buf1;
+
+    if (NULL == (buf4 = (uint_4 *)xmalloc(1 + 4 * num_bases)))
+	return -1;
+
+    if (NULL == (buf1 = (uint_1 *)xmalloc(1 + 8 * num_bases))) {
+	xfree(buf4);
+	return -1;
+    }
+
+    for (i = 0; i < num_bases; i++) {
+	buf4[i] = be_int4((&b[i])->peak_index);
+    }
+    fwrite(buf4, 4, num_bases, fp);
+    
+    for (i=0; i < num_bases; i++) {
+	buf1[i            ] = (&b[i])->prob_A;
+	buf1[i+  num_bases] = (&b[i])->prob_C;
+	buf1[i+2*num_bases] = (&b[i])->prob_G;
+	buf1[i+3*num_bases] = (&b[i])->prob_T;
+	buf1[i+4*num_bases] = (&b[i])->base;
+	buf1[i+5*num_bases] = (&b[i])->spare[0];
+	buf1[i+6*num_bases] = (&b[i])->spare[1];
+	buf1[i+7*num_bases] = (&b[i])->spare[2];
+    }
+    if (8 * num_bases != (fwrite(buf1, 1, 8 * num_bases, fp))) {
+	xfree(buf1);
+	xfree(buf4);
+	return -1;
+    }
+
+    xfree(buf1);
+    xfree(buf4);
+    return 0;
+}
+
+
+int write_scf_comment(FILE *fp, Comments *c, size_t s)
+{
+    if (fwrite(c, 1, s, fp) != s) return -1;
+
+    return 0;
+}
+
+
+
+/*
+ * Request which (major) version of scf to use when writing.
+ * Defaults to the latest. Currently suitable fields are
+ * 2 and 3.
+ *
+ * Returns 0 for success, -1 for failure.
+ */
+int set_scf_version(int version) {
+    if (version != 2 && version != 3)
+	return -1;
+
+    scf_version = version;
+    return 0;
+}
+
+/*
+ * Write Seq out as a .scf file to the 'fp' FILE *
+ */
+int fwrite_scf(Scf *scf, FILE *fp) {
+    uint_4 size;
+    int err;
+
+    /*
+     * Init header offsets.
+     *
+     * We store in order:
+     *     Header
+     *     Samples
+     *     Bases
+     *     Comments
+     *     Private
+     */
+    scf->header.samples_offset = (uint_4)sizeof(Header);
+    size = scf->header.samples * (scf->header.sample_size == 1 ?
+				  sizeof(Samples1) : sizeof(Samples2));
+    scf->header.bases_offset = (uint_4)(scf->header.samples_offset +
+					 size);
+    size = scf->header.bases * sizeof(Bases);
+    scf->header.comments_offset = (uint_4)(scf->header.bases_offset + size);
+
+    size = scf->header.comments_size;
+    scf->header.private_offset = (uint_4)(scf->header.comments_offset + size);
+
+    /* Init a few other things, such as the magic number */
+    scf->header.magic_number = SCF_MAGIC;
+
+    if (scf_version == 3) {
+	memcpy(scf->header.version, scf_version_float2str(SCF_VERSION), 4);
+    } else {
+	memcpy(scf->header.version, scf_version_float2str(SCF_VERSION_OLD), 4);
+    }
+
+    /* Write header */
+    if (write_scf_header(fp, &scf->header) == -1)
+	return -1;
+
+    if (scf_version == 3) {
+	/* Write Samples */
+	if (scf->header.sample_size == 1)
+	    err = write_scf_samples31(fp, scf->samples.samples1,
+				      scf->header.samples);
+	else
+	    err = write_scf_samples32(fp, scf->samples.samples2,
+				      scf->header.samples);
+	if (-1 == err)
+	    return -1;
+	
+	/* Write Bases */
+	if (-1 == write_scf_bases3(fp, scf->bases, scf->header.bases))
+	    return -1;
+
+    } else {
+	/* Write Samples */
+	if (scf->header.sample_size == 1)
+	    err = write_scf_samples1(fp, scf->samples.samples1,
+				     scf->header.samples);
+	else
+	    err = write_scf_samples2(fp, scf->samples.samples2,
+				     scf->header.samples);
+	if (-1 == err)
+	    return -1;
+	
+	/* Write Bases */
+	if (-1 == write_scf_bases(fp, scf->bases, scf->header.bases))
+	    return -1;
+    }
+
+    /* Write Comments */
+    if (-1 == write_scf_comment(fp, scf->comments,
+				scf->header.comments_size))
+	return -1;
+
+    /* Write private data */
+    if (scf->header.private_size) {
+	if (scf->header.private_size  != fwrite(scf->private_data, 1,
+						scf->header.private_size, fp))
+	    return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Write Seq out as a .scf file to file 'fn'.
+ */
+int write_scf(Scf *scf, char *fn)
+{
+    FILE *fp;
+
+    /* Open for for write in binary mode */
+    if ((fp = fopen(fn,"wb")) == NULL) 
+	return -1;
+
+    if (fwrite_scf(scf, fp)) {
+	fclose(fp);
+	return -1;
+    }
+
+    fclose(fp);
+    return 0;
+}