diff srf2fastq/io_lib-1.12.2/io_lib/srf.h @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/srf2fastq/io_lib-1.12.2/io_lib/srf.h	Tue Jun 07 17:48:05 2011 -0400
@@ -0,0 +1,190 @@
+#ifndef _SRF_H_
+#define _SRF_H_
+
+#include "io_lib/hash_table.h"
+#include "io_lib/ztr.h"
+#include "io_lib/mFILE.h"
+
+#define SRF_MAGIC		"SSRF"
+#define SRF_VERSION             "1.3"
+
+#define SRFB_CONTAINER 		'S'
+#define SRFB_XML		'X'
+#define SRFB_TRACE_HEADER	'H'
+#define SRFB_TRACE_BODY		'R'
+#define SRFB_INDEX		'I'
+
+/* Lack of index => 8 zero bytes at end of file to indicate zero length */
+#define SRFB_NULL_INDEX		'\0'
+
+/*--- Public structures */
+
+/* Container header - several per file */
+typedef struct {
+    int block_type;
+    char version[256];
+    char container_type;
+    char base_caller[256];
+    char base_caller_version[256];
+} srf_cont_hdr_t;
+
+/* Trace header - several per container */
+typedef struct {
+    int block_type; 
+    char read_prefix_type;
+    char id_prefix[256];
+    uint32_t trace_hdr_size;
+    unsigned char *trace_hdr;
+} srf_trace_hdr_t;
+
+/* Trace body - several per trace header */
+typedef struct {
+    int block_type;
+    int read_id_length;
+    char read_id[256];
+    unsigned char flags;
+    uint32_t trace_size;
+    unsigned char *trace;
+} srf_trace_body_t;
+
+/* XML - NCBI TraceInfo data block */
+typedef struct {
+    uint32_t xml_len;
+    char *xml;
+} srf_xml_t;
+
+#define SRF_READ_FLAG_BAD_MASK       (1<<0)
+#define SRF_READ_FLAG_WITHDRAWN_MASK (1<<1)
+#define SRF_READ_FLAG_USER_MASK      (7<<5)
+
+/* Indexing */
+typedef struct {
+    char     magic[4];
+    char     version[4];
+    uint64_t size;
+    uint32_t n_container;
+    uint32_t n_data_block_hdr;
+    uint64_t n_buckets;
+    int8_t   index_type;
+    int8_t   dbh_pos_stored_sep;
+    char     dbh_file[256];
+    char     cont_file[256];
+    int      index_hdr_sz; /* size of the above data on disk */
+} srf_index_hdr_t;
+
+/* In-memory index itself */
+#define SRF_INDEX_NAME_BLOCK_SIZE 10000000
+
+typedef struct {
+  size_t  used;
+  size_t  space;
+  char   *names;
+} srf_name_block_t;
+
+typedef struct {
+    char ch_file[PATH_MAX+1];
+    char th_file[PATH_MAX+1];
+    Array ch_pos;
+    Array th_pos;
+    Array name_blocks;
+    int dbh_pos_stored_sep;
+    HashTable *db_hash;
+} srf_index_t;
+
+/* Master SRF object */
+typedef struct {
+    FILE *fp;
+
+    /* Cached copies of each of the most recent chunk types loaded */
+    srf_cont_hdr_t    ch;
+    srf_trace_hdr_t   th;
+    srf_trace_body_t  tb;
+    srf_xml_t         xml;
+    srf_index_hdr_t   hdr;
+
+    /* Private: cached data for use by srf_next_ztr */
+    ztr_t *ztr;
+    mFILE *mf;
+    long mf_pos, mf_end;
+} srf_t;
+
+#define SRF_INDEX_MAGIC    "Ihsh"
+#define SRF_INDEX_VERSION  "1.01"
+
+
+/*--- Initialisation */
+srf_t *srf_create(FILE *fp);
+srf_t *srf_open(char *fn, char *mode);
+void srf_destroy(srf_t *srf, int auto_close);
+
+/*--- Base type I/O methods */
+
+int srf_write_pstring(srf_t *srf, char *str);
+int srf_write_pstringb(srf_t *srf, char *str, int length);
+int srf_read_pstring(srf_t *srf, char *str);
+
+int srf_read_uint32(srf_t *srf, uint32_t *val);
+int srf_write_uint32(srf_t *srf, uint32_t val);
+
+int srf_read_uint64(srf_t *srf, uint64_t *val);
+int srf_write_uint64(srf_t *srf, uint64_t val);
+
+
+/*--- Mid level I/O - srf block */
+srf_cont_hdr_t *srf_construct_cont_hdr(srf_cont_hdr_t *ch,
+				       char *bc,
+				       char *bc_version);
+void srf_destroy_cont_hdr(srf_cont_hdr_t *ch);
+int srf_read_cont_hdr(srf_t *srf, srf_cont_hdr_t *ch);
+int srf_write_cont_hdr(srf_t *srf, srf_cont_hdr_t *ch);
+
+int srf_read_xml(srf_t *srf, srf_xml_t *xml);
+int srf_write_xml(srf_t *srf, srf_xml_t *xml);
+
+srf_trace_hdr_t *srf_construct_trace_hdr(srf_trace_hdr_t *th,
+					 char *prefix,
+					 unsigned char *header,
+					 uint32_t header_sz);
+void srf_destroy_trace_hdr(srf_trace_hdr_t *th);
+int srf_read_trace_hdr(srf_t *srf, srf_trace_hdr_t *th);
+int srf_write_trace_hdr(srf_t *srf, srf_trace_hdr_t *th);
+
+srf_trace_body_t *srf_construct_trace_body(srf_trace_body_t *th,
+					   char *suffix,
+					   int suffix_len,
+					   unsigned char *body,
+					   uint32_t body_size,
+					   unsigned char flags);
+void srf_destroy_trace_body(srf_trace_body_t *th);
+int srf_write_trace_body(srf_t *srf, srf_trace_body_t *th);
+int srf_read_trace_body(srf_t *srf, srf_trace_body_t *th, int no_trace);
+
+int srf_read_index_hdr(srf_t *srf, srf_index_hdr_t *hdr, int no_seek);
+int srf_write_index_hdr(srf_t *srf, srf_index_hdr_t *hdr);
+srf_index_t *srf_index_create(char *ch_file, char *th_file, int dbh_sep);
+void srf_index_destroy(srf_index_t *idx);
+void srf_index_stats(srf_index_t *idx, FILE *fp);
+int srf_index_add_cont_hdr(srf_index_t *idx, uint64_t pos);
+int srf_index_add_trace_hdr(srf_index_t *idx, uint64_t pos);
+int srf_index_add_trace_body(srf_index_t *idx, char *name, uint64_t pos);
+int srf_index_write(srf_t *srf, srf_index_t *idx);
+
+/*--- Higher level I/O functions */
+mFILE *srf_next_trace(srf_t *srf, char *name);
+ztr_t *srf_next_ztr_flags(srf_t *srf, char *name, int filter_mask, int *flags);
+ztr_t *srf_next_ztr(srf_t *srf, char *name, int filter_mask);
+
+ztr_t *partial_decode_ztr(srf_t *srf, mFILE *mf, ztr_t *z);
+ztr_t *ztr_dup(ztr_t *src);
+
+int srf_next_block_type(srf_t *srf); /* peek ahead */
+int srf_next_block_details(srf_t *srf, uint64_t *pos, char *name);
+
+int srf_find_trace(srf_t *srf, char *trace,
+		   uint64_t *cpos, uint64_t *hpos, uint64_t *dpos);
+
+int construct_trace_name(char *fmt,
+			 unsigned char *suffix, int suffix_len,
+			 char *name, int name_len);
+
+#endif /* _SRF_H_ */