0
|
1 #include <unistd.h>
|
|
2 #include <assert.h>
|
|
3 #include "bam.h"
|
|
4
|
|
5 typedef struct {
|
|
6 long long n_reads, n_mapped, n_pair_all, n_pair_map, n_pair_good;
|
|
7 long long n_sgltn, n_read1, n_read2;
|
|
8 long long n_qcfail, n_dup;
|
|
9 long long n_diffchr, n_diffhigh;
|
|
10 } bam_flagstat_t;
|
|
11
|
|
12 #define flagstat_loop(s, c) do { \
|
|
13 ++(s)->n_reads; \
|
|
14 if ((c)->flag & BAM_FPAIRED) { \
|
|
15 ++(s)->n_pair_all; \
|
|
16 if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good; \
|
|
17 if ((c)->flag & BAM_FREAD1) ++(s)->n_read1; \
|
|
18 if ((c)->flag & BAM_FREAD2) ++(s)->n_read2; \
|
|
19 if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP)) ++(s)->n_sgltn; \
|
|
20 if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \
|
|
21 ++(s)->n_pair_map; \
|
|
22 if ((c)->mtid != (c)->tid) { \
|
|
23 ++(s)->n_diffchr; \
|
|
24 if ((c)->qual >= 5) ++(s)->n_diffhigh; \
|
|
25 } \
|
|
26 } \
|
|
27 } \
|
|
28 if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped; \
|
|
29 if ((c)->flag & BAM_FQCFAIL) ++(s)->n_qcfail; \
|
|
30 if ((c)->flag & BAM_FDUP) ++(s)->n_dup; \
|
|
31 } while (0)
|
|
32
|
|
33 bam_flagstat_t *bam_flagstat_core(bamFile fp)
|
|
34 {
|
|
35 bam_flagstat_t *s;
|
|
36 bam1_t *b;
|
|
37 bam1_core_t *c;
|
|
38 int ret;
|
|
39 s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t));
|
|
40 b = bam_init1();
|
|
41 c = &b->core;
|
|
42 while ((ret = bam_read1(fp, b)) >= 0)
|
|
43 flagstat_loop(s, c);
|
|
44 bam_destroy1(b);
|
|
45 if (ret != -1)
|
|
46 fprintf(stderr, "[bam_flagstat_core] Truncated file? Continue anyway.\n");
|
|
47 return s;
|
|
48 }
|
|
49 int bam_flagstat(int argc, char *argv[])
|
|
50 {
|
|
51 bamFile fp;
|
|
52 bam_header_t *header;
|
|
53 bam_flagstat_t *s;
|
|
54 if (argc == optind) {
|
|
55 fprintf(stderr, "Usage: samtools flagstat <in.bam>\n");
|
|
56 return 1;
|
|
57 }
|
|
58 fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r");
|
|
59 assert(fp);
|
|
60 header = bam_header_read(fp);
|
|
61 s = bam_flagstat_core(fp);
|
|
62 printf("%lld in total\n", s->n_reads);
|
|
63 printf("%lld QC failure\n", s->n_qcfail);
|
|
64 printf("%lld duplicates\n", s->n_dup);
|
|
65 printf("%lld mapped (%.2f%%)\n", s->n_mapped, (float)s->n_mapped / s->n_reads * 100.0);
|
|
66 printf("%lld paired in sequencing\n", s->n_pair_all);
|
|
67 printf("%lld read1\n", s->n_read1);
|
|
68 printf("%lld read2\n", s->n_read2);
|
|
69 printf("%lld properly paired (%.2f%%)\n", s->n_pair_good, (float)s->n_pair_good / s->n_pair_all * 100.0);
|
|
70 printf("%lld with itself and mate mapped\n", s->n_pair_map);
|
|
71 printf("%lld singletons (%.2f%%)\n", s->n_sgltn, (float)s->n_sgltn / s->n_pair_all * 100.0);
|
|
72 printf("%lld with mate mapped to a different chr\n", s->n_diffchr);
|
|
73 printf("%lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh);
|
|
74 free(s);
|
|
75 bam_header_destroy(header);
|
|
76 bam_close(fp);
|
|
77 return 0;
|
|
78 }
|