annotate PsiCLASS-1.0.2/samtools-0.1.19/bedcov.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #include <zlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 #include <ctype.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 #include <unistd.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 #include "kstring.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 #include "bgzf.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 #include "bam.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 #include "kseq.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 KSTREAM_INIT(gzFile, gzread, 16384)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 bamFile fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 bam_iter_t iter;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 int min_mapQ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 } aux_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 static int read_bam(void *data, bam1_t *b)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 aux_t *aux = (aux_t*)data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 int ret = bam_iter_read(aux->fp, aux->iter, b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 return ret;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 int main_bedcov(int argc, char *argv[])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 extern void bam_init_header_hash(bam_header_t*);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 gzFile fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 kstring_t str;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 kstream_t *ks;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 bam_index_t **idx;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 bam_header_t *h = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 aux_t **aux;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 int *n_plp, dret, i, n, c, min_mapQ = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 int64_t *cnt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 const bam_pileup1_t **plp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 while ((c = getopt(argc, argv, "Q:")) >= 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 switch (c) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 case 'Q': min_mapQ = atoi(optarg); break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 if (optind + 2 > argc) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 fprintf(stderr, "Usage: samtools bedcov <in.bed> <in1.bam> [...]\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 return 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 memset(&str, 0, sizeof(kstring_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 n = argc - optind - 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 aux = calloc(n, sizeof(void*));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 idx = calloc(n, sizeof(void*));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 for (i = 0; i < n; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 aux[i] = calloc(1, sizeof(aux_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 aux[i]->min_mapQ = min_mapQ;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 aux[i]->fp = bam_open(argv[i+optind+1], "r");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 idx[i] = bam_index_load(argv[i+optind+1]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 if (aux[i]->fp == 0 || idx[i] == 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 fprintf(stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 return 2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 bgzf_set_cache_size(aux[i]->fp, 20);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 if (i == 0) h = bam_header_read(aux[0]->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 bam_init_header_hash(h);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 cnt = calloc(n, 8);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 fp = gzopen(argv[optind], "rb");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 ks = ks_init(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 n_plp = calloc(n, sizeof(int));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 plp = calloc(n, sizeof(void*));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 char *p, *q;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 int tid, beg, end, pos;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 bam_mplp_t mplp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 for (p = q = str.s; *p && *p != '\t'; ++p);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 if (*p != '\t') goto bed_error;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 *p = 0; tid = bam_get_tid(h, q); *p = '\t';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 if (tid < 0) goto bed_error;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 for (q = p = p + 1; isdigit(*p); ++p);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 if (*p != '\t') goto bed_error;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 *p = 0; beg = atoi(q); *p = '\t';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 for (q = p = p + 1; isdigit(*p); ++p);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 if (*p == '\t' || *p == 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 int c = *p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 *p = 0; end = atoi(q); *p = c;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 } else goto bed_error;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 for (i = 0; i < n; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 if (aux[i]->iter) bam_iter_destroy(aux[i]->iter);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 aux[i]->iter = bam_iter_query(idx[i], tid, beg, end);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 mplp = bam_mplp_init(n, read_bam, (void**)aux);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 bam_mplp_set_maxcnt(mplp, 64000);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 memset(cnt, 0, 8 * n);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 if (pos >= beg && pos < end)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 for (i = 0; i < n; ++i) cnt[i] += n_plp[i];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 for (i = 0; i < n; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 kputc('\t', &str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 kputl(cnt[i], &str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 puts(str.s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 bam_mplp_destroy(mplp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 continue;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 bed_error:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 fprintf(stderr, "Errors in BED line '%s'\n", str.s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 free(n_plp); free(plp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 ks_destroy(ks);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 gzclose(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 free(cnt);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 for (i = 0; i < n; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 if (aux[i]->iter) bam_iter_destroy(aux[i]->iter);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 bam_index_destroy(idx[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 bam_close(aux[i]->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 free(aux[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 bam_header_destroy(h);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 free(aux); free(idx);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 free(str.s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 }