| 
0
 | 
     1 #include <string.h>
 | 
| 
 | 
     2 #include <stdlib.h>
 | 
| 
 | 
     3 #include <sys/stat.h>
 | 
| 
 | 
     4 #include <unistd.h>
 | 
| 
 | 
     5 #include "bcf.h"
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 #include "kseq.h"
 | 
| 
 | 
     8 KSTREAM_INIT(gzFile, gzread, 0x10000)
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 int bcfview(int argc, char *argv[]);
 | 
| 
 | 
    11 int bcf_main_index(int argc, char *argv[]);
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 #define BUF_SIZE 0x10000
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 int bcf_cat(int n, char * const *fn)
 | 
| 
 | 
    16 {
 | 
| 
 | 
    17 	int i;
 | 
| 
 | 
    18 	bcf_t *out;
 | 
| 
 | 
    19 	uint8_t *buf;
 | 
| 
 | 
    20 	buf = malloc(BUF_SIZE);
 | 
| 
 | 
    21 	out = bcf_open("-", "w");
 | 
| 
 | 
    22 	for (i = 0; i < n; ++i) {
 | 
| 
 | 
    23 		bcf_t *in;
 | 
| 
 | 
    24 		bcf_hdr_t *h;
 | 
| 
 | 
    25 		off_t end;
 | 
| 
 | 
    26 		struct stat s;
 | 
| 
 | 
    27 		in = bcf_open(fn[i], "r");
 | 
| 
 | 
    28 		h = bcf_hdr_read(in);
 | 
| 
 | 
    29 		if (i == 0) bcf_hdr_write(out, h);
 | 
| 
 | 
    30 		bcf_hdr_destroy(h);
 | 
| 
 | 
    31 #ifdef _USE_KNETFILE
 | 
| 
 | 
    32 		fstat(knet_fileno(in->fp->x.fpr), &s);
 | 
| 
 | 
    33 		end = s.st_size - 28;
 | 
| 
 | 
    34 		while (knet_tell(in->fp->x.fpr) < end) {
 | 
| 
 | 
    35 			int size = knet_tell(in->fp->x.fpr) + BUF_SIZE < end? BUF_SIZE : end - knet_tell(in->fp->x.fpr);
 | 
| 
 | 
    36 			knet_read(in->fp->x.fpr, buf, size);
 | 
| 
 | 
    37 			fwrite(buf, 1, size, out->fp->x.fpw);
 | 
| 
 | 
    38 		}
 | 
| 
 | 
    39 #else
 | 
| 
 | 
    40 		abort(); // FIXME: not implemented
 | 
| 
 | 
    41 #endif
 | 
| 
 | 
    42 		bcf_close(in);
 | 
| 
 | 
    43 	}
 | 
| 
 | 
    44 	bcf_close(out);
 | 
| 
 | 
    45 	free(buf);
 | 
| 
 | 
    46 	return 0;
 | 
| 
 | 
    47 }
 | 
| 
 | 
    48 
 | 
| 
 | 
    49 extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]);
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 int bcf_main_ldpair(int argc, char *argv[])
 | 
| 
 | 
    52 {
 | 
| 
 | 
    53 	bcf_t *fp;
 | 
| 
 | 
    54 	bcf_hdr_t *h;
 | 
| 
 | 
    55 	bcf1_t *b0, *b1;
 | 
| 
 | 
    56 	bcf_idx_t *idx;
 | 
| 
 | 
    57 	kstring_t str;
 | 
| 
 | 
    58 	void *str2id;
 | 
| 
 | 
    59 	gzFile fplist;
 | 
| 
 | 
    60 	kstream_t *ks;
 | 
| 
 | 
    61 	int dret, lineno = 0;
 | 
| 
 | 
    62 	if (argc < 3) {
 | 
| 
 | 
    63 		fprintf(stderr, "Usage: bcftools ldpair <in.bcf> <in.list>\n");
 | 
| 
 | 
    64 		return 1;
 | 
| 
 | 
    65 	}
 | 
| 
 | 
    66 	fplist = gzopen(argv[2], "rb");
 | 
| 
 | 
    67 	ks = ks_init(fplist);
 | 
| 
 | 
    68 	memset(&str, 0, sizeof(kstring_t));
 | 
| 
 | 
    69 	fp = bcf_open(argv[1], "rb");
 | 
| 
 | 
    70 	h = bcf_hdr_read(fp);
 | 
| 
 | 
    71 	str2id = bcf_build_refhash(h);
 | 
| 
 | 
    72 	idx = bcf_idx_load(argv[1]);
 | 
| 
 | 
    73 	if (idx == 0) {
 | 
| 
 | 
    74 		fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__);
 | 
| 
 | 
    75 		return 1;
 | 
| 
 | 
    76 	}
 | 
| 
 | 
    77 	b0 = calloc(1, sizeof(bcf1_t));
 | 
| 
 | 
    78 	b1 = calloc(1, sizeof(bcf1_t));
 | 
| 
 | 
    79 	while (ks_getuntil(ks, '\n', &str, &dret) >= 0) {
 | 
| 
 | 
    80 		char *p, *q;
 | 
| 
 | 
    81 		int k;
 | 
| 
 | 
    82 		int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1;
 | 
| 
 | 
    83 		++lineno;
 | 
| 
 | 
    84 		for (p = q = str.s, k = 0; *p; ++p) {
 | 
| 
 | 
    85 			if (*p == ' ' || *p == '\t') {
 | 
| 
 | 
    86 				*p = '\0';
 | 
| 
 | 
    87 				if (k == 0) tid0 = bcf_str2id(str2id, q);
 | 
| 
 | 
    88 				else if (k == 1) pos0 = atoi(q) - 1;
 | 
| 
 | 
    89 				else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0;
 | 
| 
 | 
    90 				else if (k == 3) pos1 = atoi(q) - 1;
 | 
| 
 | 
    91 				q = p + 1;
 | 
| 
 | 
    92 				++k;
 | 
| 
 | 
    93 			}
 | 
| 
 | 
    94 		}
 | 
| 
 | 
    95 		if (k == 3) pos1 = atoi(q) - 1;
 | 
| 
 | 
    96 		if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) {
 | 
| 
 | 
    97 			uint64_t off;
 | 
| 
 | 
    98 			double r, f[4];
 | 
| 
 | 
    99 			off = bcf_idx_query(idx, tid0, pos0);
 | 
| 
 | 
   100 			bgzf_seek(fp->fp, off, SEEK_SET);
 | 
| 
 | 
   101 			while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0);
 | 
| 
 | 
   102 			off = bcf_idx_query(idx, tid1, pos1);
 | 
| 
 | 
   103 			bgzf_seek(fp->fp, off, SEEK_SET);
 | 
| 
 | 
   104 			while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1);
 | 
| 
 | 
   105 			r = bcf_pair_freq(b0, b1, f);
 | 
| 
 | 
   106 			r *= r;
 | 
| 
 | 
   107 			printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1,
 | 
| 
 | 
   108 				r, f[0], f[1], f[2], f[3]);
 | 
| 
 | 
   109 		} //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno);
 | 
| 
 | 
   110 	}
 | 
| 
 | 
   111 	bcf_destroy(b0); bcf_destroy(b1);
 | 
| 
 | 
   112 	bcf_idx_destroy(idx);
 | 
| 
 | 
   113 	bcf_str2id_destroy(str2id);
 | 
| 
 | 
   114 	bcf_hdr_destroy(h);
 | 
| 
 | 
   115 	bcf_close(fp);
 | 
| 
 | 
   116 	free(str.s);
 | 
| 
 | 
   117 	ks_destroy(ks);
 | 
| 
 | 
   118 	gzclose(fplist);
 | 
| 
 | 
   119 	return 0;
 | 
| 
 | 
   120 }
 | 
| 
 | 
   121 
 | 
| 
 | 
   122 int bcf_main_ld(int argc, char *argv[])
 | 
| 
 | 
   123 {
 | 
| 
 | 
   124 	bcf_t *fp;
 | 
| 
 | 
   125 	bcf_hdr_t *h;
 | 
| 
 | 
   126 	bcf1_t **b, *b0;
 | 
| 
 | 
   127 	int i, j, m, n;
 | 
| 
 | 
   128 	double f[4];
 | 
| 
 | 
   129 	if (argc == 1) {
 | 
| 
 | 
   130 		fprintf(stderr, "Usage: bcftools ld <in.bcf>\n");
 | 
| 
 | 
   131 		return 1;
 | 
| 
 | 
   132 	}
 | 
| 
 | 
   133 	fp = bcf_open(argv[1], "rb");
 | 
| 
 | 
   134 	h = bcf_hdr_read(fp);
 | 
| 
 | 
   135 	// read the entire BCF
 | 
| 
 | 
   136 	m = n = 0; b = 0;
 | 
| 
 | 
   137 	b0 = calloc(1, sizeof(bcf1_t));
 | 
| 
 | 
   138 	while (bcf_read(fp, h, b0) >= 0) {
 | 
| 
 | 
   139 		if (m == n) {
 | 
| 
 | 
   140 			m = m? m<<1 : 16;
 | 
| 
 | 
   141 			b = realloc(b, sizeof(void*) * m);
 | 
| 
 | 
   142 		}
 | 
| 
 | 
   143 		b[n] = calloc(1, sizeof(bcf1_t));
 | 
| 
 | 
   144 		bcf_cpy(b[n++], b0);
 | 
| 
 | 
   145 	}
 | 
| 
 | 
   146 	bcf_destroy(b0);
 | 
| 
 | 
   147 	// compute pair-wise r^2
 | 
| 
 | 
   148 	printf("%d\n", n); // the number of loci
 | 
| 
 | 
   149 	for (i = 0; i < n; ++i) {
 | 
| 
 | 
   150 		printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1);
 | 
| 
 | 
   151 		for (j = 0; j < i; ++j) {
 | 
| 
 | 
   152 			double r = bcf_pair_freq(b[i], b[j], f);
 | 
| 
 | 
   153 			printf("\t%.3f", r*r);
 | 
| 
 | 
   154 		}
 | 
| 
 | 
   155 		printf("\t1.000\n");
 | 
| 
 | 
   156 	}
 | 
| 
 | 
   157 	// free
 | 
| 
 | 
   158 	for (i = 0; i < n; ++i) bcf_destroy(b[i]);
 | 
| 
 | 
   159 	free(b);
 | 
| 
 | 
   160 	bcf_hdr_destroy(h);
 | 
| 
 | 
   161 	bcf_close(fp);
 | 
| 
 | 
   162 	return 0;
 | 
| 
 | 
   163 }
 | 
| 
 | 
   164 
 | 
| 
 | 
   165 int main(int argc, char *argv[])
 | 
| 
 | 
   166 {
 | 
| 
 | 
   167 	if (argc == 1) {
 | 
| 
 | 
   168 		fprintf(stderr, "\n");
 | 
| 
 | 
   169 		fprintf(stderr, "Usage:   bcftools <command> <arguments>\n\n");
 | 
| 
 | 
   170 		fprintf(stderr, "Command: view      print, extract, convert and call SNPs from BCF\n");
 | 
| 
 | 
   171 		fprintf(stderr, "         index     index BCF\n");
 | 
| 
 | 
   172 		fprintf(stderr, "         cat       concatenate BCFs\n");
 | 
| 
 | 
   173 		fprintf(stderr, "         ld        compute all-pair r^2\n");
 | 
| 
 | 
   174 		fprintf(stderr, "         ldpair    compute r^2 between requested pairs\n");
 | 
| 
 | 
   175 		fprintf(stderr, "\n");
 | 
| 
 | 
   176 		return 1;
 | 
| 
 | 
   177 	}
 | 
| 
 | 
   178 	if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1);
 | 
| 
 | 
   179 	else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1);
 | 
| 
 | 
   180 	else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1);
 | 
| 
 | 
   181 	else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1);
 | 
| 
 | 
   182 	else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ...
 | 
| 
 | 
   183 	else {
 | 
| 
 | 
   184 		fprintf(stderr, "[main] Unrecognized command.\n");
 | 
| 
 | 
   185 		return 1;
 | 
| 
 | 
   186 	}
 | 
| 
 | 
   187 	return 0;
 | 
| 
 | 
   188 }
 |