Mercurial > repos > dawe > srf2fastq
view srf2fastq/io_lib-1.12.2/progs/srf_filter.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
line wrap: on
line source
/* * ====================================================================== * This software has been created by Genome Research Limited (GRL). * * GRL hereby grants permission to use, copy, modify and distribute * this software and its documentation for non-commercial purposes * without fee at the user's own risk on the basis set out below. * * GRL neither undertakes nor accepts any duty whether contractual or * otherwise in connection with the software, its use or the use of * any derivative, and makes no representations or warranties, express * or implied, concerning the software, its suitability, fitness for * a particular purpose or non-infringement. * * In no event shall the authors of the software or GRL be responsible * or liable for any loss or damage whatsoever arising in any way * directly or indirectly out of the use of this software or its * derivatives, even if advised of the possibility of such damage. * * Our software can be freely distributed under the conditions set out * above, and must contain this copyright notice. * ====================================================================== */ /* * This performs a linear (non-indexed) search for a trace in an SRF archive. * * It's not intended as a suitable production program or as a library of code * to use, but as a test and benchmark statistic. */ #include <string.h> #include <stdio.h> #include <unistd.h> #include <ctype.h> #include <sys/stat.h> #include <sys/types.h> #include <io_lib/Read.h> #include <io_lib/misc.h> #include <io_lib/ztr.h> #include <io_lib/srf.h> #include <io_lib/hash_table.h> #define CR 13 /* Decimal code of Carriage Return char */ #define LF 10 /* Decimal code of Line Feed char */ #define EOF_MARKER 26 /* Decimal code of DOS end-of-file marker */ #define MAX_REC_LEN 1024 /* Maximum size of input buffer */ #define CHUNK_BASE (1 << 0) #define CHUNK_CNF1 (1 << 1) #define CHUNK_CNF4 (1 << 2) #define CHUNK_SAMP (1 << 3) #define CHUNK_SMP4 (1 << 4) #define CHUNK_ALL (CHUNK_BASE | CHUNK_CNF1 | CHUNK_CNF4 | CHUNK_SAMP | CHUNK_SMP4) #define NCHUNKS 6 #define CHUNK_BASE_STR "BASE" #define CHUNK_CNF1_STR "CNF1" #define CHUNK_CNF4_STR "CNF4" #define CHUNK_SAMP_STR "SAMP" #define CHUNK_SMP4_STR "SMP4" #define CHUNK_ALL_STR "ALL" #define TYPE_PROC (1 << 0) #define TYPE_SLXI (1 << 1) #define TYPE_SLXN (1 << 2) #define TYPE_0FAM (1 << 3) #define TYPE_1CY3 (1 << 4) #define TYPE_2TXR (1 << 5) #define TYPE_3CY5 (1 << 6) #define TYPE_ALL (TYPE_PROC | TYPE_SLXI | TYPE_SLXN | TYPE_0FAM | TYPE_1CY3 | TYPE_2TXR | TYPE_3CY5) #define NTYPES 8 #define TYPE_PROC_STR "PROC" #define TYPE_SLXI_STR "SLXI" #define TYPE_SLXN_STR "SLXN" #define TYPE_0FAM_STR "0FAM" #define TYPE_1CY3_STR "1CY3" #define TYPE_2TXR_STR "2TXR" #define TYPE_3CY5_STR "3CY5" #define TYPE_ALL_STR "ALL" /* ------------------------------------------------------------------------ */ /* * All the reads and prefixes will be used to filter reads in an srf archive. * For reads the match with the reads in the archive must be exact. Prefixes * need only match to the beginning of the read, so they are a way to filter * several reads that, for example, belong to the same lane and tile. For * example, if the reads are <Center>:<Run>:<Lane>:<Tile>:<X>:<Y>, then a * prefix can progress from center all the way to y coordinate in terms of * read specificity. */ typedef struct { int prefixes_size; int reads_size; char** prefixes; /* Prefixes to filter on. */ HashTable *reads_hash; /* Reads to filter on. */ } read_filter_t; /* ------------------------------------------------------------------------ */ /* * Print usage message to stderr and exit with the given \"code\". */ void usage(int code) { fprintf(stderr, "Usage: srf_filter [-c chunk_types] [-f read_filter] [-C] [-o] [-v] input(s) output\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -c chunk_types\n"); fprintf(stderr, " Chunk types to output given as a comma delimited list of types.\n"); fprintf(stderr, " The following types are allowed: \"ALL\", \"BASE\", \"CNF1\", \"CNF4\"\n"); fprintf(stderr, " \"SAMP\", \"SMP4\".\n"); fprintf(stderr, " The default is \"ALL\".\n"); fprintf(stderr, "\n -m mdata_types\n"); fprintf(stderr, " SAMP/SMP4 mdata types to output given as a comma delimited list of types.\n"); fprintf(stderr, " The following types are allowed: \"ALL\", \"PROC\", \"SLXI\", \"SLXN\"\n"); fprintf(stderr, " \"0FAM\", \"1CY3\", \"2TXR\", \"3CY5\".\n"); fprintf(stderr, " The default is \"ALL\".\n"); fprintf(stderr, "\n -f read_filter\n"); fprintf(stderr, " The filter to apply to reads in the archive. If reads match the\n"); fprintf(stderr, " filter they are dumped.\n"); fprintf(stderr, " The filter takes the form of <name>=<value>, where <name> can be\n"); fprintf(stderr, " \"read\", \"prefix\", \"file\".\n"); fprintf(stderr, " If the <name> is \"read\" the value is represents the name of a\n"); fprintf(stderr, " read to dump. Only the matching read will be dumped.\n"); fprintf(stderr, " If the <name> is \"prefix\" the value is represents the prefix of\n"); fprintf(stderr, " the name of the reads to dump. Only the matching reads will be\n"); fprintf(stderr, " dumped.\n"); fprintf(stderr, " If the <name> is \"file\" the value is a file name where any\n"); fprintf(stderr, " number of \"read\" and \"prefix\" name value pairs can be included,\n"); fprintf(stderr, " one per line.\n"); fprintf(stderr, " The default is no filter, which means all reads are dumped.\n"); fprintf(stderr, "\n -b exclude bad reads using readsFlags bitmask in data block header.\n"); fprintf(stderr, "\n -2 cyc use this option to add a Illumina-style REGN chunk.\n"); fprintf(stderr, "\n -v Print verbose messages.\n"); fprintf(stderr, "\n"); exit(code); } /* * Reads the lines from the fiven \"input\" file and creates the reads and prefixes * for the read filter. The \"read_filter\" given is allocated. * * Returns 0 on success. Errors cause the usage message and exit. */ int read_filter_from_file(FILE *input, read_filter_t *read_filter) { int isNewline; /* Boolean indicating we've read a CR or LF */ long lFileLen; /* Length of file */ long lIndex; /* Index into cThisLine array */ long lLineCount; /* Current line number */ long lLineLen; /* Current line length */ long lStartPos; /* Offset of start of current line */ long lTotalChars; /* Total characters read */ char cThisLine[MAX_REC_LEN]; /* Contents of current line */ char *cFile; /* Dynamically allocated buffer (entire file) */ char *cThisPtr; /* Pointer to current position in cFile */ char *filter_type; char *prefix; char *read; fseek(input, 0L, SEEK_END); /* Position to end of file */ lFileLen = ftell(input); /* Get file length */ rewind(input); /* Back to start of file */ cFile = calloc(lFileLen + 1, sizeof(char)); if(cFile == NULL ) { fprintf(stderr, "\nInsufficient memory to read file.\n"); return 0; } fread(cFile, lFileLen, 1, input); /* Read the entire file into cFile */ lLineCount = 0L; lTotalChars = 0L; cThisPtr = cFile; /* Point to beginning of array */ while (*cThisPtr) /* Read until reaching null char */ { lIndex = 0L; /* Reset counters and flags */ isNewline = 0; lStartPos = lTotalChars; while (*cThisPtr) /* Read until reaching null char */ { if (!isNewline) /* Haven't read a CR or LF yet */ { if (*cThisPtr == CR || *cThisPtr == LF) /* This char IS a CR or LF */ isNewline = 1; /* Set flag */ } else if (*cThisPtr != CR && *cThisPtr != LF) /* Already found CR or LF */ break; /* Done with line */ /* Don't copy LS or CR */ if (*cThisPtr != CR && *cThisPtr != LF) { cThisLine[lIndex++] = *cThisPtr++; /* Add char to output and increment */ ++lTotalChars; } else { cThisPtr++; } } /* end while (*cThisPtr) */ cThisLine[lIndex] = '\0'; /* Terminate the string */ ++lLineCount; /* Increment the line counter */ lLineLen = strlen(cThisLine); /* Get length of line */ /* Find the one and only = in the string. */ if(strchr(cThisLine,'=') != NULL && (strchr(cThisLine,'=') == strrchr(cThisLine,'='))) { filter_type = strtok (cThisLine, "="); } else { fprintf(stderr, "Baddly formatted read filter \"%s\". Expected an \"=\" character in middle of filter.\n", cThisLine); usage(1); } if (!strcmp(filter_type, "prefix")) { prefix = strtok (NULL, "="); if(prefix == NULL) { fprintf(stderr, "Bad prefix \"%s\" in read filter \"%s\".\n", prefix, cThisLine); usage(1); } else { ++(read_filter->prefixes_size); read_filter->prefixes = (char**) realloc (read_filter->prefixes, read_filter->prefixes_size * sizeof(char *)); read_filter->prefixes[read_filter->prefixes_size - 1] = (char*) calloc (strlen(prefix) + 1,sizeof(char)); strcpy(read_filter->prefixes[read_filter->prefixes_size - 1], prefix); } } else if (!strcmp(filter_type, "read")) { read = strtok (NULL, "="); if(read == NULL) { fprintf(stderr, "Bad read \"%s\" in read filter \"%s\".\n", read, cThisLine); usage(1); } else { char *file; HashItem *hi; if( NULL != (file = strchr(read, ' '))) { *file = '\0'; file++; printf("read=%s file=%s\n", read, file); } else { printf("read=%s\n", read); } if (NULL == (hi = (HashTableSearch(read_filter->reads_hash, read, strlen(read))))) { HashData hd; hd.i = read_filter->reads_size; if (NULL == (hi = HashTableAdd(read_filter->reads_hash, read, strlen(read), hd, NULL))) { fprintf(stderr, "\nUnable to process read filter.\n"); return 0; } ++(read_filter->reads_size); } } } else { fprintf(stderr, "Unrecognized filter type \"%s\" given as part of read filter \"%s\". The valid filter types are \"%s\".\n", filter_type, cThisLine, "prefix or read"); usage(1); } } free(cFile); return 0; } /* * Parse the given \"filter_value\" string for the filter value. * * Returns a read filter allocated on the heap which needs to be freed by the * calling function. Errors usually cause the usage message and exit. */ read_filter_t *get_read_filter(char *filter_value) { char *filter_type = NULL; char *file_name = NULL; FILE *fp = NULL; char *prefix = NULL; char *read = NULL; /* Create read filter. */ read_filter_t *read_filter = (read_filter_t *)calloc(1, sizeof(read_filter_t)); if(read_filter == NULL) { return NULL; } read_filter->prefixes_size = 0; read_filter->reads_size = 0; if (NULL == (read_filter->reads_hash = HashTableCreate(0, HASH_DYNAMIC_SIZE|HASH_FUNC_JENKINS3))) { return NULL; } /* Find the one and only = in the string. */ if( strchr(filter_value,'=') != NULL && (strchr(filter_value,'=') == strrchr(filter_value,'=')) ) { filter_type = strtok (filter_value,"="); } else { fprintf(stderr, "Baddly formatted read filter \"%s\". Expected an \"=\" character in middle of filter.\n", filter_value); usage(1); } /* Check the string before the = is a valid filter type. */ if(!strcmp("file", filter_type)) { /* Read the file. */ file_name = strtok (NULL, "="); if(file_name == NULL) { fprintf(stderr, "Bad file name \"%s\" in read filter \"%s\".\n", file_name, filter_value); usage(1); } fp = fopen(file_name, "r"); if(fp == NULL) { fprintf(stderr, "Bad file name \"%s\" in read filter \"%s\".\n", file_name, filter_value); usage(1); } /* Read line by line. */ if(read_filter_from_file(fp, read_filter)) { fprintf(stderr, "Bad contents of file %s.\n", file_name); usage(1); } } else if (!strcmp("prefix", filter_type)) { prefix = strtok (NULL, "="); if(prefix == NULL) { fprintf(stderr, "Bad prefix \"%s\" in read filter \"%s\".\n", prefix, filter_value); usage(1); } else { ++(read_filter->prefixes_size); read_filter->prefixes = (char**) malloc (read_filter->prefixes_size * sizeof(char *)); read_filter->prefixes[read_filter->prefixes_size - 1] = (char*) calloc (strlen(prefix) + 1,sizeof(char)); strcpy(read_filter->prefixes[read_filter->prefixes_size - 1], prefix); } } else if (!strcmp("read", filter_type)) { read = strtok (NULL, "="); if(read == NULL) { fprintf(stderr, "Bad read \"%s\" in read filter \"%s\".\n", read, filter_value); usage(1); } else { HashItem *hi; if (NULL == (hi = (HashTableSearch(read_filter->reads_hash, read, strlen(read))))) { HashData hd; hd.i = 0; if (NULL == (hi = HashTableAdd(read_filter->reads_hash, read, strlen(read), hd, NULL))) { return NULL; } ++(read_filter->reads_size); } } } else { fprintf(stderr, "Unrecognized filter type \"%s\" given as part of read filter \"%s\". The valid filter types are \"%s\".\n", filter_type, filter_value, "prefix, read, or file"); usage(1); } return read_filter; } /* * Parse the comma delimited list of chunk types and put them in the single character \"mode\". * * Returns 0 on success. */ int get_chunk_types(char *arg, char *mode) { int num_allowed_types = NCHUNKS; char *allowed_str_types[] = {CHUNK_BASE_STR,CHUNK_CNF1_STR,CHUNK_CNF4_STR,CHUNK_SAMP_STR,CHUNK_SMP4_STR,CHUNK_ALL_STR}; char allowed_types[] = {CHUNK_BASE,CHUNK_CNF1,CHUNK_CNF4,CHUNK_SAMP,CHUNK_SMP4,CHUNK_ALL}; char *type; int i = 0; type = strtok (arg,","); while(type) { for(i = 0; i < num_allowed_types; i++) { if(!strcmp(type, allowed_str_types[i]) && !(*mode & allowed_types[i])) { *mode += allowed_types[i]; break; } } type = strtok (NULL, ","); } return 0; } /* * Parse the comma delimited list of mdata types and put them in the single character \"mode\". * * Returns 0 on success. */ int get_mdata_types(char *arg, char *mode) { int num_allowed_types = NTYPES; char *allowed_str_types[] = {TYPE_PROC_STR, TYPE_SLXI_STR, TYPE_SLXN_STR, TYPE_0FAM_STR, TYPE_1CY3_STR, TYPE_2TXR_STR, TYPE_3CY5_STR, TYPE_ALL_STR}; char allowed_types[] = {TYPE_PROC, TYPE_SLXI, TYPE_SLXN, TYPE_0FAM, TYPE_1CY3, TYPE_2TXR, TYPE_3CY5, TYPE_ALL}; char *type; int i = 0; type = strtok (arg,","); while(type) { for(i = 0; i < num_allowed_types; i++) { if(!strcmp(type, allowed_str_types[i]) && !(*mode & allowed_types[i])) { *mode += allowed_types[i]; break; } } type = strtok (NULL, ","); } return 0; } /* * Returns 1 is the read \"name\" matches any of the reads or prefixes in the \"read_filter\". */ int check_read_name(read_filter_t *read_filter, char *name) { int i; for(i = 0; i < read_filter->prefixes_size; i++) { if(name == strstr(name, read_filter->prefixes[i])) { return 1; } } if( read_filter->reads_size ){ HashItem *hi; if (NULL != (hi = (HashTableSearch(read_filter->reads_hash, name, strlen(name))))) { return 1; } } return 0; } void dump_read_filter(read_filter_t *read_filter) { int i; printf("Read filter used:\n"); for(i = 0; i < read_filter->prefixes_size; i++) { printf("\tprefix[%d] = %s\n", i, read_filter->prefixes[i]); } if( read_filter->reads_size ){ int ibucket; for (ibucket=0; ibucket<read_filter->reads_hash->nbuckets; ibucket++) { HashItem *hi; for (hi = read_filter->reads_hash->bucket[ibucket]; hi; hi = hi->next) { printf("\tread[%d] = %s\n", hi->data.i, hi->key); } } } } void dump_chunk_mode(char mode) { printf("mode: %d.\n", mode); if(mode & CHUNK_BASE) { printf("BASE chunk required.\n"); } if(mode & CHUNK_CNF1) { printf("CNF1 chunk required.\n"); } if(mode & CHUNK_CNF4) { printf("CNF4 chunk required.\n"); } if(mode & CHUNK_SAMP) { printf("SAMP chunk required.\n"); } if(mode & CHUNK_SMP4) { printf("SMP4 chunk required.\n"); } } void dump_mdata_mode(char mode) { printf("mode: %d.\n", mode); if(mode & TYPE_PROC) { printf("Illumina PROC SAMP/SMP4 chunk required.\n"); } if(mode & TYPE_SLXI) { printf("Illumina SLXI SAMP/SMP4 chunk required.\n"); } if(mode & TYPE_SLXN) { printf("Illumina SLXN SAMP/SMP4 chunk required.\n"); } if(mode & TYPE_0FAM) { printf("Solid 0FAM SAMP/SMP4 chunk required.\n"); } if(mode & TYPE_1CY3) { printf("Solid 1CY3 SAMP/SMP4 chunk required.\n"); } if(mode & TYPE_2TXR) { printf("Solid 2TXR SAMP/SMP4 chunk required.\n"); } if(mode & TYPE_3CY5) { printf("Solid 3CY3 SAMP/SMP4 chunk required.\n"); } } /* * ztr_mwrite_chunk * * Writes a ZTR chunk including chunk header and data * * Arguments: * fp A mFILE pointer * chunk A pointer to the chunk to write * * Returns: * Success: 0 * Failure: -1 */ static int ztr_mwrite_chunk(mFILE *fp, ztr_chunk_t *chunk) { int4 bei4; /* { char str[5]; fprintf(stderr, "Write chunk %.4s %08x length %d\n", ZTR_BE2STR(chunk->type, str), chunk->type, chunk->dlength); } */ /* type */ bei4 = be_int4(chunk->type); if (1 != mfwrite(&bei4, 4, 1, fp)) return -1; /* metadata length */ bei4 = be_int4(chunk->mdlength); if (1 != mfwrite(&bei4, 4, 1, fp)) return -1; /* metadata */ if (chunk->mdlength) if (chunk->mdlength != mfwrite(chunk->mdata, 1, chunk->mdlength, fp)) return -1; /* data length */ bei4 = be_int4(chunk->dlength); if (1 != mfwrite(&bei4, 4, 1, fp)) return -1; /* data */ if (chunk->dlength != mfwrite(chunk->data, 1, chunk->dlength, fp)) return -1; return 0; } /* * Adds a ZTR REGN chunk describing the paired-end structure. Ie which bit * is which. This is a simplified form of the more generic REGN contents. * * Returns 0 for success * -1 for failure */ static int add_readpair_region(unsigned int rev_cycle, mFILE *mf) { char *mdata = malloc(100); unsigned char *data = malloc(5); int mdlen; ztr_chunk_t c; if (!data || !mdata) return -1; data[0] = 0; rev_cycle--; /* we count from 0 */ data[1] = (rev_cycle >> 24) & 0xff; data[2] = (rev_cycle >> 16) & 0xff; data[3] = (rev_cycle >> 8) & 0xff; data[4] = (rev_cycle >> 0) & 0xff; mdlen = sprintf(mdata, "NAME%cforward:P;reverse:P%c",0,0); /* Initialise */ c.type = ZTR_TYPE_REGN; c.data = data; c.dlength = 5; c.mdata = mdata; c.mdlength = mdlen; c.ztr_owns = 1; ztr_mwrite_chunk(mf, &c); xfree(data); xfree(mdata); return 0; } /* * Given the input archive name (input), the output archive name (output), * the chunk types to output (chunk_mode) and some other parameters such as * the read filter generates a filtered srf file. * * Note the generated srf file is NOT indexed * * Returns 0 on success. * 1 on failure */ int srf_filter(char *input, srf_t *out_srf, char chunk_mode, char mdata_mode, int filter_mode, read_filter_t *read_filter, int read_mask, int rev_cycle) { srf_t *in_srf; int output_trace_header; char name[1024]; if (NULL == (in_srf = srf_open(input, "rb"))) { perror(input); return 1; } do { int type; ztr_chunk_t *chunk; switch(type = srf_next_block_type(in_srf)) { case SRFB_CONTAINER: if (0 != srf_read_cont_hdr(in_srf, &in_srf->ch)) { fprintf(stderr, "Error reading container header.\nExiting.\n"); exit(1); } if (0 != srf_write_cont_hdr(out_srf, &in_srf->ch)) { fprintf(stderr, "Error writing container header.\nExiting.\n"); exit(1); } break; case SRFB_XML: if (0 != srf_read_xml(in_srf, &in_srf->xml)) { fprintf(stderr, "Error reading XML.\nExiting.\n"); exit(1); } if (0 != srf_write_xml(out_srf, &in_srf->xml)) { fprintf(stderr, "Error writing XML.\nExiting.\n"); exit(1); } break; case SRFB_TRACE_HEADER: if (0 != srf_read_trace_hdr(in_srf, &in_srf->th)) { fprintf(stderr, "Error reading trace header.\nExiting.\n"); exit(1); } #if 1 if (chunk_mode == CHUNK_ALL && mdata_mode == TYPE_ALL) { if (!rev_cycle) { output_trace_header = 1; break; } } #endif /* Decode ZTR chunks in the header */ if (in_srf->mf) mfdestroy(in_srf->mf); in_srf->mf = mfcreate(NULL, 0); if (in_srf->th.trace_hdr_size) mfwrite(in_srf->th.trace_hdr, 1, in_srf->th.trace_hdr_size, in_srf->mf); if (in_srf->ztr) delete_ztr(in_srf->ztr); mrewind(in_srf->mf); /* create the trace header data */ mFILE *mf = mfcreate(NULL, 0); if (NULL != (in_srf->ztr = partial_decode_ztr(in_srf, in_srf->mf, NULL))) { in_srf->mf_pos = mftell(in_srf->mf); mfseek(in_srf->mf, 0, SEEK_END); in_srf->mf_end = mftell(in_srf->mf); mfseek(in_srf->mf, 0, SEEK_SET); mfwrite(in_srf->mf->data, 1, sizeof(ztr_header_t), mf); mfseek(in_srf->mf, sizeof(ztr_header_t), SEEK_CUR); int pos = mftell(in_srf->mf); while (chunk = ztr_read_chunk_hdr(in_srf->mf)) { char *key = ztr_lookup_mdata_value(in_srf->ztr, chunk, "TYPE"); int flag = 0; /* filter on chunk type */ switch (chunk->type) { case ZTR_TYPE_BASE: if (chunk_mode & CHUNK_BASE) flag = 1; break; case ZTR_TYPE_CNF1: if (chunk_mode & CHUNK_CNF1) flag = 1; break; case ZTR_TYPE_CNF4: if (chunk_mode & CHUNK_CNF4) flag = 1; break; case ZTR_TYPE_REGN: if (!rev_cycle) flag = 1; break; case ZTR_TYPE_SAMP: if (chunk_mode & CHUNK_SAMP) { if (mdata_mode == TYPE_ALL) flag = 1; if ((mdata_mode & TYPE_0FAM) && (key && 0 == strcmp(key, "0FAM"))) flag = 1; if ((mdata_mode & TYPE_1CY3) && (key && 0 == strcmp(key, "1CY3"))) flag = 1; if ((mdata_mode & TYPE_2TXR) && (key && 0 == strcmp(key, "2TXR"))) flag = 1; if ((mdata_mode & TYPE_3CY5) && (key && 0 == strcmp(key, "3CY5"))) flag = 1; } break; case ZTR_TYPE_SMP4: if (chunk_mode & CHUNK_SMP4) { if (mdata_mode == TYPE_ALL) flag = 1; if ((mdata_mode & TYPE_PROC) && (NULL == key || 0 == strcmp(key, "PROC"))) flag = 1; if ((mdata_mode & TYPE_SLXI) && (key && 0 == strcmp(key, "SLXI"))) flag = 1; if ((mdata_mode & TYPE_SLXN) && (key && 0 == strcmp(key, "SLXN"))) flag = 1; } break; default: flag = 1; break; } if (flag) mfwrite(in_srf->mf->data+pos, 1, (4+4+chunk->mdlength+4+chunk->dlength), mf); mfseek(in_srf->mf, chunk->dlength, SEEK_CUR); pos = mftell(in_srf->mf); if (chunk->mdata) xfree(chunk->mdata); xfree(chunk); } if (rev_cycle) { fprintf(stderr, "Adding REGN chunk to trace header\n"); if ( -1 == add_readpair_region(rev_cycle, mf)) { fprintf(stderr, "Failed to add to REGN chunk\n"); exit(1); } } #if 1 if (chunk_mode == CHUNK_ALL && mdata_mode == TYPE_ALL) { mfwrite(in_srf->mf->data+in_srf->mf_pos, 1 , (in_srf->mf_end-in_srf->mf_pos), mf); } #endif } else { /* Maybe not enough to decode or no headerBlob. */ /* So delay until decoding the body. */ in_srf->mf_pos = 0; } mfseek(in_srf->mf, 0, SEEK_END); in_srf->mf_end = mftell(in_srf->mf); /* construct the new trace header */ uint32_t trace_hdr_size = mftell(mf); char *trace_hdr; if (NULL == (trace_hdr = malloc(trace_hdr_size))) { stderr, "Error making trace header.\nExiting.\n"; exit(1); } memcpy(trace_hdr, mf->data, trace_hdr_size); if (out_srf->th.trace_hdr) free(out_srf->th.trace_hdr); srf_construct_trace_hdr(&out_srf->th, in_srf->th.id_prefix, trace_hdr, trace_hdr_size); output_trace_header = 1; mfdestroy(mf); break; case SRFB_TRACE_BODY: { srf_trace_body_t old_tb; ztr_t *ztr_tmp; if (0 != srf_read_trace_body(in_srf, &old_tb, 0)) { fprintf(stderr, "Error reading trace body.\nExiting.\n"); exit(1); } if (-1 == construct_trace_name(in_srf->th.id_prefix, (unsigned char *)old_tb.read_id, old_tb.read_id_length, name, 512)) { fprintf(stderr, "Error constructing trace name.\nExiting.\n"); exit(1); } if (old_tb.flags & read_mask) { if (old_tb.trace_size) free(old_tb.trace); break; } if (filter_mode && !check_read_name(read_filter, name)) { if (old_tb.trace_size) free(old_tb.trace); break; } #if 1 if (chunk_mode == CHUNK_ALL && mdata_mode == TYPE_ALL) { /* output the trace header as required */ if( output_trace_header ) { output_trace_header = 0; if (!rev_cycle ) { if (0 != srf_write_trace_hdr(out_srf, &in_srf->th)) { fprintf(stderr, "Error writing trace header.\nExiting.\n"); exit(1); } }else{ if (0 != srf_write_trace_hdr(out_srf, &out_srf->th)) { fprintf(stderr, "Error writing trace header.\nExiting.\n"); exit(1); } } } if (!rev_cycle || (rev_cycle && in_srf->mf_pos)) { if (0 != srf_write_trace_body(out_srf, &old_tb)) { fprintf(stderr, "Error writing trace body.\nExiting.\n"); exit(1); } } if (old_tb.trace_size) free(old_tb.trace); break; } #endif if (!in_srf->mf) { fprintf(stderr, "Error reading trace body.\nExiting.\n"); exit(1); } mfseek(in_srf->mf, in_srf->mf_end, SEEK_SET); if (old_tb.trace_size) { mfwrite(old_tb.trace, 1, old_tb.trace_size, in_srf->mf); free(old_tb.trace); old_tb.trace = NULL; } mftruncate(in_srf->mf, mftell(in_srf->mf)); mfseek(in_srf->mf, in_srf->mf_pos, SEEK_SET); if (in_srf->ztr) ztr_tmp = ztr_dup(in_srf->ztr); /* inefficient, but simple */ else ztr_tmp = NULL; if ((ztr_tmp = partial_decode_ztr(in_srf, in_srf->mf, ztr_tmp))) { /* create the trace body data */ mFILE *mf = mfcreate(NULL, 0); /* include the ztr header if it wasn't in the trace header block */ if( !in_srf->mf_pos ){ mfseek(in_srf->mf, 0, SEEK_SET); mfwrite(in_srf->mf->data, 1, sizeof(ztr_header_t), mf); mfseek(in_srf->mf, sizeof(ztr_header_t), SEEK_CUR); }else{ mfseek(in_srf->mf, in_srf->mf_pos, SEEK_SET); } int pos = mftell(in_srf->mf); while (chunk = ztr_read_chunk_hdr(in_srf->mf)) { char *key = ztr_lookup_mdata_value(ztr_tmp, chunk, "TYPE"); int flag = 0; /* filter on chunk type */ switch (chunk->type) { case ZTR_TYPE_BASE: if (chunk_mode & CHUNK_BASE) flag = 1; break; case ZTR_TYPE_CNF1: if (chunk_mode & CHUNK_CNF1) flag = 1; break; case ZTR_TYPE_CNF4: if (chunk_mode & CHUNK_CNF4) flag = 1; break; case ZTR_TYPE_REGN: if (rev_cycle && in_srf->mf_pos) { fprintf(stderr, "Added REGN chunk to trace header but found REGN chunk in trace body\n"); exit(1); } if (!rev_cycle) flag = 1; break; case ZTR_TYPE_SAMP: if (chunk_mode & CHUNK_SAMP) { if (mdata_mode == TYPE_ALL) flag = 1; if ((mdata_mode & TYPE_0FAM) && (key && 0 == strcmp(key, "0FAM"))) flag = 1; if ((mdata_mode & TYPE_1CY3) && (key && 0 == strcmp(key, "1CY3"))) flag = 1; if ((mdata_mode & TYPE_2TXR) && (key && 0 == strcmp(key, "2TXR"))) flag = 1; if ((mdata_mode & TYPE_3CY5) && (key && 0 == strcmp(key, "3CY5"))) flag = 1; } break; case ZTR_TYPE_SMP4: if (chunk_mode & CHUNK_SMP4) { if (mdata_mode == TYPE_ALL) flag = 1; if ((mdata_mode & TYPE_PROC) && (NULL == key || 0 == strcmp(key, "PROC"))) flag = 1; if ((mdata_mode & TYPE_SLXI) && (key && 0 == strcmp(key, "SLXI"))) flag = 1; if ((mdata_mode & TYPE_SLXN) && (key && 0 == strcmp(key, "SLXN"))) flag = 1; } break; default: flag = 1; break; } if (flag) mfwrite(in_srf->mf->data+pos, 1, (4+4+chunk->mdlength+4+chunk->dlength), mf); mfseek(in_srf->mf, chunk->dlength, SEEK_CUR); pos = mftell(in_srf->mf); if (chunk->mdata) xfree(chunk->mdata); xfree(chunk); } if (rev_cycle && !in_srf->mf_pos) { fprintf(stderr, "Adding REGN chunk to trace body\n"); if ( -1 == add_readpair_region(rev_cycle, mf)) { fprintf(stderr, "Failed to add to REGN chunk\n"); exit(1); } } /* output the trace header as required */ if( output_trace_header ) { output_trace_header = 0; if (0 != srf_write_trace_hdr(out_srf, &out_srf->th)) { fprintf(stderr, "Error writing trace header.\nExiting.\n"); exit(1); } } /* construct the new trace body */ srf_trace_body_t new_tb; srf_construct_trace_body(&new_tb, name+strlen(in_srf->th.id_prefix), -1, mf->data, mf->size, old_tb.flags); if (0 != srf_write_trace_body(out_srf, &new_tb)) { fprintf(stderr, "Error writing trace body.\nExiting.\n"); exit(1); } mfdestroy(mf); } if( ztr_tmp ) delete_ztr(ztr_tmp); break; } case -1: { /* are we really at the end of the srf file */ long pos = ftell(in_srf->fp); fseek(in_srf->fp, 0, SEEK_END); if( pos != ftell(in_srf->fp) ){ srf_destroy(in_srf, 1); fprintf(stderr, "srf file is corrupt\n"); return 1; } srf_destroy(in_srf, 1); return 0; } case SRFB_NULL_INDEX: { /* * Maybe the last 8 bytes of a the file (or previously was * last 8 bytes prior to concatenating SRF files together). * If so it's the index length and should always be 8 zeros. */ uint64_t ilen; if (1 != fread(&ilen, 8, 1, in_srf->fp)) { srf_destroy(in_srf, 1); fprintf(stderr, "srf file is corrupt\n"); return 1; } if (ilen != 0) { srf_destroy(in_srf, 1); fprintf(stderr, "srf file is corrupt\n"); return 1; } break; } case SRFB_INDEX: { long pos = ftell(in_srf->fp); srf_read_index_hdr(in_srf, &in_srf->hdr, 1); /* Skip the index body */ fseeko(in_srf->fp, pos + in_srf->hdr.size, SEEK_SET); break; } default: srf_destroy(in_srf, 1); fprintf(stderr, "Block of unknown type '%c'. Aborting\n", type); return 1; } } while (1); srf_destroy(in_srf, 1); return 0; } /* ------------------------------------------------------------------------ */ /* * Main method. */ int main(int argc, char **argv) { int nfiles, ifile; int filter_mode = 0; char *input = NULL; char *output = NULL; read_filter_t *read_filter = NULL; char *filter_value = NULL; int rev_cycle = 0; int c; int errflg = 0; extern char *optarg; extern int optind, optopt; char chunk_mode = CHUNK_ALL; char mdata_mode = TYPE_ALL; int verbose = 0; int read_mask = 0; srf_t *srf = NULL; while ((c = getopt(argc, argv, ":c:m:f:vb2:")) != -1) { switch (c) { case 'c': chunk_mode = 0; if(get_chunk_types(optarg, &chunk_mode) || !chunk_mode) { fprintf(stderr, "Invalid value \"%s\" given to option -%c.\n", optarg, c); errflg++; } break; case 'm': mdata_mode = 0; if(get_mdata_types(optarg, &mdata_mode) || !mdata_mode) { fprintf(stderr, "Invalid value \"%s\" given to option -%c.\n", optarg, c); errflg++; } break; case 'f': filter_mode++; filter_value = optarg; break; case 'v': verbose++; break; case 'b': read_mask = SRF_READ_FLAG_BAD_MASK; break; case '2': rev_cycle = atoi(optarg); break; case ':': /* -? without operand */ fprintf(stderr, "Option -%c requires an operand\n", optopt); errflg++; break; case '?': fprintf(stderr, "Unrecognised option: -%c\n", optopt); errflg++; } } if (errflg) { usage(1); } nfiles = (argc-optind); if( nfiles < 2 ){ fprintf(stderr, "Please specify input archive name(s) and an output archive name.\n"); usage(1); } output = argv[optind+nfiles-1]; nfiles--; if(filter_mode) { read_filter = get_read_filter(filter_value); if(verbose) { dump_read_filter(read_filter); } } if(chunk_mode && verbose) { dump_chunk_mode(chunk_mode); } if(mdata_mode && verbose) { dump_mdata_mode(mdata_mode); } if (NULL == (srf = srf_open(output, "wb"))) { perror(output); return 1; } for (ifile=0; ifile<nfiles; ifile++) { input = argv[optind+ifile]; printf("Reading archive %s.\n", input); if (0 != srf_filter(input, srf, chunk_mode, mdata_mode, filter_mode, read_filter, read_mask, rev_cycle)) { srf_destroy(srf, 1); remove(output); return 1; } } if(NULL != srf) srf_destroy(srf, 1); return 0; }