Mercurial > repos > miller-lab > genome_diversity
view genome_diversity/src/mito_lib.c @ 24:248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Tue, 28 May 2013 16:24:19 -0400 |
parents | |
children |
line wrap: on
line source
// mito_data.c -- shared procedures to read SNP and coverage file for // mitogenomes #include "lib.h" #include "mito_lib.h" #define ADHOC // get the adequately covered intervals for each specified individual; // merge adjacent intervals void get_intervals(char *filename) { FILE *fp = ckopen(filename, "r"); char buf[500], name[100]; struct interv *p, *new; int i, b, e, cov; while (fgets(buf, 500, fp)) { if (sscanf(buf, "%*s %d %d %d %s", &b, &e, &cov, name) != 4) fatalf("interval: %s", buf); if (cov < min_cov) continue; for (i = 0; i < nM && !same_string(M[i].name, name); ++i) ; if (i == nM) continue; if (M[i].last != NULL && M[i].last->e == b) { // merge with adjacent interval M[i].last->e = e; continue; } new = ckalloc(sizeof(*new)); new->b = b; new->e = e; new->next = NULL; if ((p = M[i].last) == NULL) M[i].intervals = new; else p->next = new; M[i].last = new; } fclose(fp); /* for (i = 0; i < nM; ++i) { printf("%s:", M[i].name); for (p = M[i].intervals; p; p = p->next) printf(" %d-%d", p->b, p->e); putchar('\n'); } */ } // get the SNPs; for each SNP set the array of (first characters from) // genotypes of the specified samples (individuals) int get_variants(char *filename, struct snp *S, int refcol) { FILE *fp = ckopen(filename, "r"); char buf[5000], *s, *f[101], *z = " \t\n\0"; int i, n, c; for (i = 0; i <= 100; ++i) f[i] = NULL; for (n = 0; fgets(buf, 500, fp); ++n) { if (buf[0] == '#') { --n; continue; } if (n >= MAX_SNP) fatal("too many SNPs"); if (sscanf(buf, "%*s %d", &(S[n].pos)) != 1) fatalf("pos : %s", buf); S[n].g = ckalloc((nM+1)*(sizeof(char))); S[n].g[nM] = '\0'; for (i = 0; i <= 100; ++i) if (f[i] != NULL) free(f[i]); for (i = 1, s = strtok(buf, z); s; s = strtok(NULL, z), ++i) f[i] = copy_string(s); for (i = 0; i < nM; ++i) { // genotype is 2 columns past the individual's 1st // column // AD HOC RULE: IF THERE IS ONE READ OF EACH ALLELE, // THEN IGNORE THE SNP. c = M[i].col; if (refcol == 0) S[n].g[i] = f[c+2][0]; else if (same_string(f[refcol+2], f[c+2])) S[n].g[i] = '2'; else S[n].g[i] = '0'; #ifdef ADHOC if (same_string(f[c], "1") && same_string(f[c+1], "1")) S[n].g[i] = '-'; #endif } } fclose(fp); return n; }