annotate deseq-hts_1.0/mex/get_reads_direct.cpp @ 0:94a108763d9e draft

deseq-hts version 1.0 wraps the DESeq 1.6.0
author vipints
date Wed, 09 May 2012 20:43:47 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
1 /*
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
2 * This program is free software; you can redistribute it and/or modify
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
3 * it under the terms of the GNU General Public License as published by
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
4 * the Free Software Foundation; either version 3 of the License, or
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
5 * (at your option) any later version.
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
6 *
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
7 * Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
8 * Copyright (C) 2010-2011 Max Planck Society
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
9 */
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
10
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
11
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
12 #include <stdio.h>
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
13 #include <assert.h>
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
14 #include "sam.h"
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
15 #include "get_reads_direct.h"
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
16
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
17 #include <vector>
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
18 using std::vector;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
19 #include <string>
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
20 using std::string;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
21
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
22 #ifdef __cplusplus
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
23 extern "C" {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
24 #endif
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
25
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
26 typedef struct {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
27 int beg, end;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
28 samfile_t *in;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
29 } tmpstruct_t;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
30
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
31 typedef struct {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
32 uint64_t u, v;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
33 } pair64_t;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
34
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
35 static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
36 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
37 uint32_t rbeg = b->core.pos;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
38 uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
39 return (rend > beg && rbeg < end);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
40 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
41
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
42 pair64_t * get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int* cnt_off);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
43
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
44 int bam_fetch_reads(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_header_t* header, vector<CRead*>* reads, char strand);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
45
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
46 // callback for bam_plbuf_init()
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
47 static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
48 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
49 //tmpstruct_t *tmp = (tmpstruct_t*)data;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
50 //if ((int)pos >= tmp->beg && (int)pos < tmp->end)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
51 // printf("%s\t%d\t%d\n", tmp->in->header->target_name[tid], pos + 1, n);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
52 return 0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
53 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
54 #ifdef __cplusplus
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
55 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
56 #endif
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
57 int parse_sam_line(char* line, CRead* read);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
58 //int set_strand(char c);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
59 //void parse_cigar(bam1_t* b, CRead* read);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
60 void parse_cigar(bam1_t* b, CRead* read, bam_header_t* header);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
61
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
62
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
63 int get_reads_from_bam(char* filename, char* region, vector<CRead*>* reads, char strand, int lsubsample)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
64 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
65 subsample = lsubsample;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
66 //set_strand(strand);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
67
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
68 srand (time(NULL));
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
69 //srand (1234);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
70 tmpstruct_t tmp;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
71 tmp.in = samopen(filename, "rb", 0);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
72 if (tmp.in == 0) {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
73 fprintf(stderr, "Fail to open BAM file %s\n", filename);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
74 return 1;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
75 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
76 int ref;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
77 bam_index_t *idx;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
78 bam_plbuf_t *buf;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
79 idx = bam_index_load(filename); // load BAM index
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
80 if (idx == 0) {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
81 fprintf(stderr, "BAM indexing file is not available.\n");
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
82 return 1;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
83 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
84 bam_parse_region(tmp.in->header, region, &ref,
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
85 &tmp.beg, &tmp.end); // parse the region
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
86 if (ref < 0) {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
87 fprintf(stderr, "Invalid region %s\n", region);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
88 return 1;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
89 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
90
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
91 buf = bam_plbuf_init(pileup_func, &tmp); // initialize pileup
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
92
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
93 bam_fetch_reads(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, buf, tmp.in->header, reads, strand);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
94 //fprintf(stdout, "intron_list: %d \n", intron_list->size());
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
95
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
96 bam_plbuf_push(0, buf); // finalize pileup
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
97 bam_index_destroy(idx);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
98 bam_plbuf_destroy(buf);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
99 samclose(tmp.in);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
100 return 0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
101 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
102
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
103
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
104 int bam_fetch_reads(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_header_t* header, vector<CRead*>* reads, char strand)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
105 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
106 int n_off;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
107 pair64_t *off = get_chunk_coordinates(idx, tid, beg, end, &n_off);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
108 if (off == 0) return 0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
109 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
110 // retrive alignments
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
111 uint64_t curr_off;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
112 int i, ret, n_seeks;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
113 n_seeks = 0; i = -1; curr_off = 0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
114 bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
115 for (;;) {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
116 if (curr_off == 0 || curr_off >= off[i].v) { // then jump to the next chunk
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
117 if (i == n_off - 1) break; // no more chunks
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
118 if (i >= 0) assert(curr_off == off[i].v); // otherwise bug
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
119 if (i < 0 || off[i].v != off[i+1].u) { // not adjacent chunks; then seek
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
120 bam_seek(fp, off[i+1].u, SEEK_SET);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
121 curr_off = bam_tell(fp);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
122 ++n_seeks;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
123 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
124 ++i;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
125 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
126 if ((ret = bam_read1(fp, b)) > 0) {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
127 curr_off = bam_tell(fp);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
128 if (b->core.tid != tid || b->core.pos >= end) break; // no need to proceed
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
129 else if (is_overlap(beg, end, b))
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
130 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
131 int rr = rand();
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
132 if ((rr%1000 < subsample))
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
133 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
134 CRead* read = new CRead();
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
135 parse_cigar(b, read, header);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
136
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
137 if (strand == '0' || strand==read->strand[0] || read->strand[0]=='0')
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
138 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
139 read->left = (b->core.flag & left_flag_mask) >0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
140 read->right = (b->core.flag & right_flag_mask) >0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
141 read->reverse = (b->core.flag & reverse_flag_mask) >0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
142 reads->push_back(read);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
143 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
144 else
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
145 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
146 delete read;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
147 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
148 //else if (read->strand[0]=='0'&&((b->core.flag & g_flag_off) >0))
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
149 //{
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
150 // //fprintf(stdout, "(-)-strand; read->strand[0]==0, num_exons: %i \n", read->block_starts.size());
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
151 // // this flag means that the read has been reversed for alignment
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
152 // // flag bit set and (-)-strand requested
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
153 // reads->push_back(read);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
154 //}
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
155 //else if (read->strand[0]=='0'&&(g_flag_on>0&&(b->core.flag & g_flag_on)==0))
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
156 //{
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
157 // //fprintf(stdout, "(+)-strand; read->strand[0]==0, num_exons: %i \n", read->block_starts.size());
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
158 // // (+)-strand requested and flag bit not set
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
159 // reads->push_back(read);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
160 //}
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
161 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
162 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
163 } else break; // end of file
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
164 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
165 // fprintf(stderr, "[bam_fetch] # seek calls: %d\n", n_seeks);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
166 bam_destroy1(b);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
167 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
168 free(off);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
169 return 0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
170 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
171
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
172 void parse_cigar(bam1_t* b, CRead* read, bam_header_t* header)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
173 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
174 read->start_pos = b->core.pos+1;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
175 read->set_strand('0');
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
176 read->read_id = new char[100];
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
177 sprintf(read->read_id, "%s\0", bam1_qname(b));
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
178
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
179 for (int k = 0; k < b->core.n_cigar; ++k)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
180 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
181 int op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
182 int l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
183 //fprintf(stdout, "op:%d l:%d\n", op, l);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
184 if (op == BAM_CMATCH)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
185 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
186 if (k==0)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
187 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
188 read->block_lengths.push_back(l);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
189 read->block_starts.push_back(0);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
190 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
191 else
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
192 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
193 int op_prev = bam1_cigar(b)[k-1] & BAM_CIGAR_MASK;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
194 int l_prev = bam1_cigar(b)[k-1] >> BAM_CIGAR_SHIFT;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
195 if (op_prev==BAM_CREF_SKIP)// intron before
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
196 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
197 if (read->block_lengths.size()>=1)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
198 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
199 int last_block_start = (*(read->block_starts.end()-1));
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
200 int intron_start = last_block_start+(*(read->block_lengths.end()-1));
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
201 read->block_lengths.push_back(l);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
202 read->block_starts.push_back(intron_start+l_prev);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
203 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
204 else
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
205 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
206 // start of first block was not a match
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
207 read->block_lengths.push_back(l);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
208 read->block_starts.push_back(0);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
209 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
210 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
211 else
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
212 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
213 if (read->block_lengths.size()>=1 && op == BAM_CDEL)// if it is an insertion then the matching block is not inreased
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
214 (*(read->block_lengths.end()-1))+=l;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
215 else
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
216 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
217 //char *samline = bam_format1(header, b);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
218 //printf("header: %s \n", samline);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
219 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
220 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
221 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
222 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
223 else if (op == BAM_CDEL)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
224 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
225 if (k>0 && read->block_lengths.size()>=1)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
226 (*(read->block_lengths.end()-1))+=l;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
227 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
228 else if (op == BAM_CREF_SKIP)//intron
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
229 {}
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
230 else if (op == BAM_CINS || op == BAM_CSOFT_CLIP)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
231 {}
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
232 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
233 // parse auxiliary data
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
234 uint8_t* s = bam1_aux(b);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
235 uint8_t* end = b->data + b->data_len;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
236 while (s < end)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
237 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
238 uint8_t type, key[2];
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
239 key[0] = s[0]; key[1] = s[1];
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
240 s += 2; type = *s; ++s;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
241 //fprintf(stdout, "\n%c%c:%c\n", key[0], key[1], type);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
242 if (type == 'A')
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
243 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
244 if ( key[0] =='X' && key[1] == 'S')
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
245 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
246 read->set_strand((char) *s);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
247 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
248 ++s;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
249 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
250 else if (type == 'C')
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
251 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
252 if ( key[0] =='H' && key[1] == '0')
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
253 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
254 uint8_t matches = *s;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
255 read->matches = (int) matches;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
256 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
257 if ( key[0] =='N' && key[1] == 'M')
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
258 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
259 uint8_t mismatches = *s;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
260 read->mismatches = (int) mismatches;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
261 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
262 if ( key[0] =='H' && key[1] == 'I')
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
263 {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
264 uint8_t mai = *s;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
265 read->multiple_alignment_index = (int) mai;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
266 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
267
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
268 ++s;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
269 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
270 else if (type == 'c') { ++s; }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
271 else if (type == 'S') { s += 2; }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
272 else if (type == 's') { s += 2; }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
273 else if (type == 'I') { s += 4; }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
274 else if (type == 'i') { s += 4; }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
275 else if (type == 'f') { s += 4; }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
276 else if (type == 'd') { s += 8; }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
277 else if (type == 'Z') { ++s; }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
278 else if (type == 'H') { ++s; }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
279 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
280 }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
281
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
282 //int set_strand(char c)
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
283 //{
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
284 // if (c=='+')
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
285 // {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
286 // char* fl = (char*) "0x0010";
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
287 // g_flag_on = strtol(fl, 0, 0);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
288 // g_flag_off = 0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
289 // }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
290 // else if (c=='-')
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
291 // {
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
292 // char* fl = (char*) "0x0010";
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
293 // g_flag_off = strtol(fl, 0, 0);
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
294 // g_flag_on = 0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
295 // }
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
296 // return 0;
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
297 //}
94a108763d9e deseq-hts version 1.0 wraps the DESeq 1.6.0
vipints
parents:
diff changeset
298