Mercurial > repos > siyuan > prada
comparison pyPRADA_1.2/tools/samtools-0.1.16/bcftools/main.c @ 0:acc2ca1a3ba4
Uploaded
| author | siyuan |
|---|---|
| date | Thu, 20 Feb 2014 00:44:58 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:acc2ca1a3ba4 |
|---|---|
| 1 #include <string.h> | |
| 2 #include <stdlib.h> | |
| 3 #include <sys/stat.h> | |
| 4 #include <unistd.h> | |
| 5 #include "bcf.h" | |
| 6 | |
| 7 #include "kseq.h" | |
| 8 KSTREAM_INIT(gzFile, gzread, 0x10000) | |
| 9 | |
| 10 int bcfview(int argc, char *argv[]); | |
| 11 int bcf_main_index(int argc, char *argv[]); | |
| 12 | |
| 13 #define BUF_SIZE 0x10000 | |
| 14 | |
| 15 int bcf_cat(int n, char * const *fn) | |
| 16 { | |
| 17 int i; | |
| 18 bcf_t *out; | |
| 19 uint8_t *buf; | |
| 20 buf = malloc(BUF_SIZE); | |
| 21 out = bcf_open("-", "w"); | |
| 22 for (i = 0; i < n; ++i) { | |
| 23 bcf_t *in; | |
| 24 bcf_hdr_t *h; | |
| 25 off_t end; | |
| 26 struct stat s; | |
| 27 in = bcf_open(fn[i], "r"); | |
| 28 h = bcf_hdr_read(in); | |
| 29 if (i == 0) bcf_hdr_write(out, h); | |
| 30 bcf_hdr_destroy(h); | |
| 31 #ifdef _USE_KNETFILE | |
| 32 fstat(knet_fileno(in->fp->x.fpr), &s); | |
| 33 end = s.st_size - 28; | |
| 34 while (knet_tell(in->fp->x.fpr) < end) { | |
| 35 int size = knet_tell(in->fp->x.fpr) + BUF_SIZE < end? BUF_SIZE : end - knet_tell(in->fp->x.fpr); | |
| 36 knet_read(in->fp->x.fpr, buf, size); | |
| 37 fwrite(buf, 1, size, out->fp->x.fpw); | |
| 38 } | |
| 39 #else | |
| 40 abort(); // FIXME: not implemented | |
| 41 #endif | |
| 42 bcf_close(in); | |
| 43 } | |
| 44 bcf_close(out); | |
| 45 free(buf); | |
| 46 return 0; | |
| 47 } | |
| 48 | |
| 49 extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]); | |
| 50 | |
| 51 int bcf_main_ldpair(int argc, char *argv[]) | |
| 52 { | |
| 53 bcf_t *fp; | |
| 54 bcf_hdr_t *h; | |
| 55 bcf1_t *b0, *b1; | |
| 56 bcf_idx_t *idx; | |
| 57 kstring_t str; | |
| 58 void *str2id; | |
| 59 gzFile fplist; | |
| 60 kstream_t *ks; | |
| 61 int dret, lineno = 0; | |
| 62 if (argc < 3) { | |
| 63 fprintf(stderr, "Usage: bcftools ldpair <in.bcf> <in.list>\n"); | |
| 64 return 1; | |
| 65 } | |
| 66 fplist = gzopen(argv[2], "rb"); | |
| 67 ks = ks_init(fplist); | |
| 68 memset(&str, 0, sizeof(kstring_t)); | |
| 69 fp = bcf_open(argv[1], "rb"); | |
| 70 h = bcf_hdr_read(fp); | |
| 71 str2id = bcf_build_refhash(h); | |
| 72 idx = bcf_idx_load(argv[1]); | |
| 73 if (idx == 0) { | |
| 74 fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__); | |
| 75 return 1; | |
| 76 } | |
| 77 b0 = calloc(1, sizeof(bcf1_t)); | |
| 78 b1 = calloc(1, sizeof(bcf1_t)); | |
| 79 while (ks_getuntil(ks, '\n', &str, &dret) >= 0) { | |
| 80 char *p, *q; | |
| 81 int k; | |
| 82 int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1; | |
| 83 ++lineno; | |
| 84 for (p = q = str.s, k = 0; *p; ++p) { | |
| 85 if (*p == ' ' || *p == '\t') { | |
| 86 *p = '\0'; | |
| 87 if (k == 0) tid0 = bcf_str2id(str2id, q); | |
| 88 else if (k == 1) pos0 = atoi(q) - 1; | |
| 89 else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0; | |
| 90 else if (k == 3) pos1 = atoi(q) - 1; | |
| 91 q = p + 1; | |
| 92 ++k; | |
| 93 } | |
| 94 } | |
| 95 if (k == 3) pos1 = atoi(q) - 1; | |
| 96 if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) { | |
| 97 uint64_t off; | |
| 98 double r, f[4]; | |
| 99 off = bcf_idx_query(idx, tid0, pos0); | |
| 100 bgzf_seek(fp->fp, off, SEEK_SET); | |
| 101 while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0); | |
| 102 off = bcf_idx_query(idx, tid1, pos1); | |
| 103 bgzf_seek(fp->fp, off, SEEK_SET); | |
| 104 while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1); | |
| 105 r = bcf_pair_freq(b0, b1, f); | |
| 106 r *= r; | |
| 107 printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1, | |
| 108 r, f[0], f[1], f[2], f[3]); | |
| 109 } //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno); | |
| 110 } | |
| 111 bcf_destroy(b0); bcf_destroy(b1); | |
| 112 bcf_idx_destroy(idx); | |
| 113 bcf_str2id_destroy(str2id); | |
| 114 bcf_hdr_destroy(h); | |
| 115 bcf_close(fp); | |
| 116 free(str.s); | |
| 117 ks_destroy(ks); | |
| 118 gzclose(fplist); | |
| 119 return 0; | |
| 120 } | |
| 121 | |
| 122 int bcf_main_ld(int argc, char *argv[]) | |
| 123 { | |
| 124 bcf_t *fp; | |
| 125 bcf_hdr_t *h; | |
| 126 bcf1_t **b, *b0; | |
| 127 int i, j, m, n; | |
| 128 double f[4]; | |
| 129 if (argc == 1) { | |
| 130 fprintf(stderr, "Usage: bcftools ld <in.bcf>\n"); | |
| 131 return 1; | |
| 132 } | |
| 133 fp = bcf_open(argv[1], "rb"); | |
| 134 h = bcf_hdr_read(fp); | |
| 135 // read the entire BCF | |
| 136 m = n = 0; b = 0; | |
| 137 b0 = calloc(1, sizeof(bcf1_t)); | |
| 138 while (bcf_read(fp, h, b0) >= 0) { | |
| 139 if (m == n) { | |
| 140 m = m? m<<1 : 16; | |
| 141 b = realloc(b, sizeof(void*) * m); | |
| 142 } | |
| 143 b[n] = calloc(1, sizeof(bcf1_t)); | |
| 144 bcf_cpy(b[n++], b0); | |
| 145 } | |
| 146 bcf_destroy(b0); | |
| 147 // compute pair-wise r^2 | |
| 148 printf("%d\n", n); // the number of loci | |
| 149 for (i = 0; i < n; ++i) { | |
| 150 printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1); | |
| 151 for (j = 0; j < i; ++j) { | |
| 152 double r = bcf_pair_freq(b[i], b[j], f); | |
| 153 printf("\t%.3f", r*r); | |
| 154 } | |
| 155 printf("\t1.000\n"); | |
| 156 } | |
| 157 // free | |
| 158 for (i = 0; i < n; ++i) bcf_destroy(b[i]); | |
| 159 free(b); | |
| 160 bcf_hdr_destroy(h); | |
| 161 bcf_close(fp); | |
| 162 return 0; | |
| 163 } | |
| 164 | |
| 165 int main(int argc, char *argv[]) | |
| 166 { | |
| 167 if (argc == 1) { | |
| 168 fprintf(stderr, "\n"); | |
| 169 fprintf(stderr, "Usage: bcftools <command> <arguments>\n\n"); | |
| 170 fprintf(stderr, "Command: view print, extract, convert and call SNPs from BCF\n"); | |
| 171 fprintf(stderr, " index index BCF\n"); | |
| 172 fprintf(stderr, " cat concatenate BCFs\n"); | |
| 173 fprintf(stderr, " ld compute all-pair r^2\n"); | |
| 174 fprintf(stderr, " ldpair compute r^2 between requested pairs\n"); | |
| 175 fprintf(stderr, "\n"); | |
| 176 return 1; | |
| 177 } | |
| 178 if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1); | |
| 179 else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1); | |
| 180 else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1); | |
| 181 else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1); | |
| 182 else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ... | |
| 183 else { | |
| 184 fprintf(stderr, "[main] Unrecognized command.\n"); | |
| 185 return 1; | |
| 186 } | |
| 187 return 0; | |
| 188 } |
