changeset 29:fb944979bf35

Update to Miller Lab devshed revision 5f0be4d1db30
author Richard Burhans <burhans@bx.psu.edu>
date Thu, 25 Jul 2013 12:01:47 -0400
parents 184d14e4270d
children 4188853b940b
files genome_diversity/src/dpmix.c genome_diversity/src/sweep.c
diffstat 2 files changed, 47 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/genome_diversity/src/dpmix.c	Wed Jul 17 12:46:46 2013 -0400
+++ b/genome_diversity/src/dpmix.c	Thu Jul 25 12:01:47 2013 -0400
@@ -53,8 +53,6 @@
 #include "lib.h"
 #include <math.h>
 
-//#define ABT
-
 // maximum length of a line from the table
 #define MOST 50000
 
@@ -86,11 +84,14 @@
 	int b, e;
 } H[MOST];
 
+#define MAX_CHR_NAME 1000000
+
 // global variables
 int *B[7],	// backpointer to state at the previous SNP (or event)
     *P;		// chromosome position
-int nH, nI, nG, genotypes, nsnp, debug, chr_col, logs, last_snp_pos, pop3;
-char this_chr[100];
+int nH, nI, nG, genotypes, nsnp, debug, chr_col, logs, last_snp_pos, pop3,
+ nchr_name;
+char this_chr[100], *chr_name[MAX_CHR_NAME];
 double switch_penalty;
 char buf[MOST], *status;
 FILE *fp, *out;
@@ -220,10 +221,6 @@
 				B[state][i] = state;
 			continue;
 		}
-#ifdef ABT
-fprintf(stderr, "%d: ABT genotype=%d, KHO %f,%f YRI %f,%f\n",
-  p->pos, p->g[a], p->F1, 1.0-p->F1, p->F2, 1.0-p->F2);
-#endif
 		
 		for (state = 0; state < 7; ++state) {
 			// find highest path-score from this event onward
@@ -237,21 +234,8 @@
 			B[state][i] = m;
 			ff[state] = f[m];
 			if (state > 0 && p->type == 0)
-{
 				ff[state] +=
 				    score(p->F1, p->F2, p->F3, p->g[a], state);
-#ifdef ABT
-if (state == 2)
-        fprintf(stderr, "     YRI:");
-if (state == 4)
-        fprintf(stderr, "  hetero:");
-if (state == 1)
-        fprintf(stderr, "     KHO:");
-if (pop3 || state == 1 || state == 2 || state == 4)
-fprintf(stderr, "  score=%f ff=%f B = %d\n",
-        score(p->F1, p->F2, p->F3, p->g[a], state), ff[state], m);
-#endif
-}
 		}
 		if (p->type == 1) {
 			// start of heterochomatic interval. Force paths
@@ -269,7 +253,7 @@
 			for (j = 0; j < 7; ++j) {
 				if (pop3 || j == 3 || j == 5 || j == 6)
 				  fprintf(stderr, " %f(%d)", from[j], B[j][i]);
-		}
+			}
 			putc('\n', stderr);
 		}
 	}
@@ -279,10 +263,6 @@
 		if (from[j] > from[state])
 			state = j;
 
-#ifdef ABT
-fprintf(stderr, "start backtrace in state %d\n", state);
-#endif
-
 	// trace back to find the switch points
 	// A[a].x[state] records the total length of intervals in each state
 	for (prev_pos = i = 0; i < nsnp; ++i) {
@@ -323,14 +303,15 @@
 */
 void one_chr() {
 	char *s, *z = "\t\n";
-	int X[MOST], n, i, g, A1, B1, A2, B2, A3, B3, a, do_read, p, pos, het;
-	// int j, k, tied;
+	int X[MOST], n, i, g, A1, B1, A2, B2, A3, B3, a, do_read, p, pos, het,
+	  old_pos;
 	struct snp *new;
 	double F1, F2, F3;
-	// doublewinner, try;
 
 	strcpy(this_chr, get_chr_name());
-	nsnp = 0;
+	if (nchr_name == 0)
+		chr_name[nchr_name++] = copy_string(this_chr);
+	old_pos = nsnp = 0;
 	last = NULL;
 	// advance to this chromosome in the list of heterochromatic intervals
 	for (het = 0; het < nH && !same_string(this_chr, H[het].chr); ++het)
@@ -339,8 +320,17 @@
 	for (do_read = 0; ; do_read = 1) {
 		if (do_read && (status = fgets(buf, MOST, fp)) == NULL)
 			break; 
-		if (!same_string(get_chr_name(), this_chr))
+		if (!same_string(s = get_chr_name(), this_chr)) {
+			if (nchr_name >= MAX_CHR_NAME)
+				fatal("Too many chromosome names");
+			for (i = 0;
+			     i < nchr_name && !same_string(s, chr_name[i]); ++i)
+				;
+			if (i < nchr_name)
+				fatalf("SNVs on %s aren't together", s);
+			chr_name[nchr_name++] = copy_string(s);
 			break;
+		}
 		
 		// set X[i] = atoi(i-th word of buf), i is base 1
 		for (i = 1, s = strtok(buf, z); s != NULL;
@@ -350,6 +340,9 @@
 		// insert events (pseudo-SNPs) for heterochomatin intervals
 		// coming before the SNP
 		pos = X[chr_col+1];
+		if (pos <= old_pos)
+			fatalf("SNP at %s %d is out of order", this_chr, pos);
+		old_pos = pos;
 		while (het < nH && same_string(this_chr, H[het].chr) &&
 		   H[het].b < pos) {
 			add_het(H[het].b, 1);
@@ -373,7 +366,7 @@
 				if (g == -1)
 					continue;
 				if (g < 0 || g > 2)
-					fatalf("invalid genotype %d", g);
+					fatalf("invalid genotype %d in column %d, pos %d", g, n+2, X[2]);
 				if (p == 1) {
 					A1 += g;
 					B1 += (2 - g);
--- a/genome_diversity/src/sweep.c	Wed Jul 17 12:46:46 2013 -0400
+++ b/genome_diversity/src/sweep.c	Thu Jul 25 12:01:47 2013 -0400
@@ -56,6 +56,10 @@
 } W[MAX_WINDOW];
 int nW;
 
+#define MAX_CHR_NAME 1000000
+char *chr_name[MAX_CHR_NAME];
+int nchr_name;
+
 // return the linked list of SNPs in positions b to e
 struct snp *add_snps(int b, int e) {
 	struct snp *first = NULL, *last = NULL, *new;
@@ -95,7 +99,8 @@
 int get_chr(FILE *fp, int chr_col, int pos_col, int score_col, char *chr) {
 	static char buf[BUF_SIZE];
 	static int init = 1;
-	char *status;
+	int old_pos = 0, p, i;
+	char *status, *s;
 
 	if (init) {
 		while ((status = fgets(buf, BUF_SIZE, fp)) != NULL &&
@@ -111,6 +116,9 @@
 	if (buf[0] == '#')
 		fatal("cannot happen");
 	strcpy(chr, get_col(buf, chr_col));
+	if (nchr_name == 0)
+		chr_name[nchr_name++] = copy_string(chr);
+
 	S[0].pos = atoi(get_col(buf, pos_col));
 	if (S[0].pos < 0)
 		fatalf("remove unmapped SNPs (address = -1)");
@@ -120,13 +128,24 @@
 			buf[0] = '\0';
 			return 1;
 		}
-		if (!same_string(chr, get_col(buf, chr_col)))
+		if (!same_string(chr, s = get_col(buf, chr_col)))
 			break;
-		S[nS].pos = atoi(get_col(buf, pos_col));
+		S[nS].pos = p = atoi(get_col(buf, pos_col));
+		if (p <= old_pos)
+			fatalf("SNV at %s %d is out of order", chr, p);
+		old_pos = p;
 		if (S[nS].pos < 0)
 			fatalf("remove unmapped SNPs (address = -1)");
 		S[nS].x = atof(get_col(buf, score_col));
 	}
+	if (nchr_name >= MAX_CHR_NAME)
+		fatal("Too many chromosome names");
+	for (i = 0; i < nchr_name && !same_string(s, chr_name[i]); ++i)
+		;
+	if (i < nchr_name)
+		fatalf("SNVs on %s aren't together", s);
+	chr_name[nchr_name++] = copy_string(s);
+
 	return 1;
 }