Mercurial > repos > dawe > srf2fastq
diff srf2fastq/io_lib-1.12.2/progs/extract_qual.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/srf2fastq/io_lib-1.12.2/progs/extract_qual.c Tue Jun 07 17:48:05 2011 -0400 @@ -0,0 +1,354 @@ +/* + * Copyright (c) Medical Research Council 1994-1999. All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation for any purpose is hereby granted without fee, provided that + * this copyright and notice appears in all copies. + * + * This file was written by James Bonfield, Simon Dear, Rodger Staden, + * as part of the Staden Package at the MRC Laboratory of Molecular + * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. + * + * MRC disclaims all warranties with regard to this software. + */ + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <strings.h> +#include <stdlib.h> +#include <unistd.h> +#include <io_lib/Read.h> +#include <io_lib/traceType.h> +#include <io_lib/expFileIO.h> +#include <io_lib/open_trace_file.h> +#include <io_lib/xalloc.h> + +/* #include "stdio_hack.h" */ + +#define LINE_LENGTH 60 + +/* + * Converts the confidence array to the accuracy value string (AV). + * + * Note no memory overrun checks are performed on buf. It is recommended + * that it is allocated to 4*len (worst case of "100 " for each base). + * + * Returns the buf argument. + */ +static char *my_conf2str(int1 *conf, int len, char *buf) { + int i; + char *ret = buf, *rs = buf; + + for (i = 0; i < len; i++) { + sprintf(buf, "%d ", conf[i]); + buf += strlen(buf); + + if (buf - rs > LINE_LENGTH) { + *buf++ = '\n'; + *buf = '\0'; + rs = buf; + } + } + + return ret; +} + +static int do_trans(mFILE *infp, char *in_file, FILE *outfp, int format, + int good_only, int clip_cosmid, int fasta_out) { + Read *r; + char *tmp_prob_A, *tmp_prob_C, *tmp_prob_G, *tmp_prob_T; + char *cstr = NULL; + + read_sections(READ_BASES); + if (NULL == (r = mfread_reading(infp, in_file, format))) { + fprintf(stderr, "Failed to read file '%s'\n", in_file); + return 1; + } + + tmp_prob_A = r->prob_A; + tmp_prob_C = r->prob_C; + tmp_prob_G = r->prob_G; + tmp_prob_T = r->prob_T; + +#ifdef IOLIB_EXP + if (good_only && r->orig_trace_format == TT_EXP) { + int left=0, right=r->NBases + 1, val, lval, rval; + Exp_info *e = (Exp_info *)r->orig_trace; + + if (0 == exp_get_int(e, EFLT_SL, &val)) + if (val > left) + left = val; + if (0 == exp_get_int(e, EFLT_QL, &val)) + if (val > left) + left = val; + + if (0 == exp_get_int(e, EFLT_SR, &val)) + if (val < right) + right = val; + if (0 == exp_get_int(e, EFLT_QR, &val)) + if (val < right) + right = val; + + /* This is horrid - see gap seqInfo.c file for explaination */ + if (clip_cosmid) { + int got_cosmid; + + if (0 == exp_get_rng(e, EFLT_CS, &lval, &rval)) { + got_cosmid = 1; + } else if (0 == exp_get_int(e, EFLT_CL, &lval) && + 0 == exp_get_int(e, EFLT_CR, &rval)) { + got_cosmid = 1; + } else { + got_cosmid = 0; + } + + if (got_cosmid) { + if (lval <= left && rval <= left) ; + else if (lval <= left+1 && rval < right) left = rval; + else if (lval <= left+1 && rval >= right) right = left+1; + else if (lval < right && rval < right) right = lval; + else if (lval < right && rval >= right) right = lval; + } + } + + r->prob_A += left; + r->prob_C += left; + r->prob_G += left; + r->prob_T += left; + r->NBases = right - left - 1; + } else +#endif /* IOLIB_EXP */ + if (good_only) { + r->prob_A += r->leftCutoff; + r->prob_C += r->leftCutoff; + r->prob_G += r->leftCutoff; + r->prob_T += r->leftCutoff; + r->NBases = r->rightCutoff - r->leftCutoff - 1; + } + + /* Confidence values */ + if (r->prob_A && r->prob_C && r->prob_G && r->prob_T && + r->NBases > 0) { + int i; + + /* We have some, but are they non zero values? */ + for (i = 0; i < r->NBases; i++) { + if (r->prob_A[i] || r->prob_C[i] || + r->prob_G[i] || r->prob_T[i]) + break; + } + if (i != r->NBases) { + int1 *conf = (int1 *)xmalloc(r->NBases); + cstr = (char *)xmalloc(r->NBases * 4 + 2); + + for (i = 0; i < r->NBases; i++) { + switch (r->base[i]) { + case 'a': + case 'A': + conf[i] = r->prob_A[i]; + break; + case 'c': + case 'C': + conf[i] = r->prob_C[i]; + break; + case 'g': + case 'G': + conf[i] = r->prob_G[i]; + break; + case 't': + case 'T': + conf[i] = r->prob_T[i]; + break; + case 'b': + case 'B': + conf[i] = (r->prob_C[i] + r->prob_G[i] + r->prob_T[i]) / 3; + break; + case 'd': + case 'D': + conf[i] = (r->prob_A[i] + r->prob_G[i] + r->prob_T[i]) / 3; + break; + case 'h': + case 'H': + conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_T[i]) / 3; + break; + case 'v': + case 'V': + conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_G[i]) / 3; + break; + case 'k': + case 'K': + conf[i] = (r->prob_G[i] + r->prob_T[i]) / 2; + break; + case 'm': + case 'M': + conf[i] = (r->prob_A[i] + r->prob_C[i]) / 2; + break; + case 'r': + case 'R': + conf[i] = (r->prob_A[i] + r->prob_G[i]) / 2; + break; + case 's': + case 'S': + conf[i] = (r->prob_C[i] + r->prob_G[i]) / 2; + break; + case 'w': + case 'W': + conf[i] = (r->prob_A[i] + r->prob_T[i]) / 2; + break; + case 'y': + case 'Y': + conf[i] = (r->prob_C[i] + r->prob_T[i]) / 2; + break; + default: + conf[i] = (r->prob_A[i] + r->prob_C[i] + r->prob_G[i] + r->prob_T[i]) / 4; + } + } + + my_conf2str(conf, r->NBases, cstr); + xfree(conf); + } + } + + if (fasta_out) { + char *p = strrchr(in_file, '/'); + /* Add header */ + if (NULL == p) + p = in_file; + else + p++; + fprintf(outfp, ">%s\n", p); + } + + if (cstr) { + fprintf(outfp,"%s\n", cstr); + xfree(cstr); + } + + r->prob_A = tmp_prob_A; + r->prob_C = tmp_prob_C; + r->prob_G = tmp_prob_G; + r->prob_T = tmp_prob_T; + read_deallocate(r); + fflush(outfp); + + return 0; +} + +static void usage(void) { + fprintf(stderr, "Usage: extract_qual [-r] [-(abi|alf|scf|exp|pln|ctf|ztr)]\n" + " [-good_only] [-clip_cosmid] [-fasta_out]\n" + " [-output output_name] [input_name] ...\n"); + exit(1); +} + +int main(int argc, char **argv) { + int from_stdin = 1; + mFILE *infp = mstdin(); + FILE *outfp = stdout; + int format = TT_ANY; + int redirect = 1; + int good_only = 0; + int clip_cosmid = 0; + int fasta_out = 0; + int ret = 0; + char *fofn = NULL; + + for (argc--, argv++; argc > 0; argc--, argv++) { + if (strcmp(*argv, "-r") == 0) { + redirect = 0; + } else if (strcasecmp(*argv, "-abi") == 0) { + format = TT_ABI; + } else if (strcasecmp(*argv, "-alf") == 0) { + format = TT_ALF; + } else if (strcasecmp(*argv, "-scf") == 0) { + format = TT_SCF; + } else if (strcasecmp(*argv, "-exp") == 0) { + format = TT_EXP; + } else if (strcasecmp(*argv, "-pln") == 0) { + format = TT_PLN; + } else if (strcasecmp(*argv, "-ztr") == 0) { + format = TT_ZTR; + } else if (strcasecmp(*argv, "-ctf") == 0) { + format = TT_CTF; + } else if (strcasecmp(*argv, "-good_only") == 0) { + good_only = 1; + } else if (strcasecmp(*argv, "-clip_cosmid") == 0) { + clip_cosmid = 1; + } else if (strcasecmp(*argv, "-fasta_out") == 0) { + fasta_out = 1; + } else if (strcmp(*argv, "-fofn") == 0) { + fofn = *++argv; + argc--; + from_stdin = 0; + } else if (strcasecmp(*argv, "-output") == 0) { + if (NULL == (outfp = fopen(*++argv, "wb"))) { + perror(*argv); + return 1; + } + argc--; + } else if (**argv != '-') { + from_stdin = 0; + break; + } else { + usage(); + } + } + + read_experiment_redirect(redirect); + + if (!from_stdin) { + if (fofn) { + FILE *fofn_fp; + char line[8192]; + + if (strcmp(fofn, "stdin") == 0) + fofn_fp = stdin; + else + fofn_fp = fopen(fofn, "r"); + + if (fofn_fp) { + while (fgets(line, 8192, fofn_fp) != NULL) { + char *cp; + if (cp = strchr(line, '\n')) + *cp = 0; + if (format == TT_EXP) { + infp = open_exp_mfile(line, NULL); + } else { + infp = open_trace_mfile(line, NULL); + } + if (NULL == infp) { + perror(line); + ret = 1; + } else { + ret |= do_trans(infp, line, outfp, format, good_only, + clip_cosmid, fasta_out); + mfclose(infp); + } + } + fclose(fofn_fp); + } + } + for (;argc > 0; argc--, argv++) { + if (format == TT_EXP) { + infp = open_exp_mfile(*argv, NULL); + } else { + infp = open_trace_mfile(*argv, NULL); + } + if (NULL == infp) { + perror(*argv); + ret = 1; + } else { + ret |= do_trans(infp, *argv, outfp, format, good_only, + clip_cosmid, fasta_out); + mfclose(infp); + } + } + } else { + ret = do_trans(infp, "<stdin>", outfp, format, good_only, clip_cosmid, + fasta_out); + } + + return ret; +} +