Mercurial > repos > vipints > deseq_hts
annotate deseq-hts_2.0/mex/get_bam_properties.cpp @ 11:cec4b4fb30be draft default tip
DEXSeq version 1.6 added
author | vipints <vipin@cbio.mskcc.org> |
---|---|
date | Tue, 08 Oct 2013 08:22:45 -0400 |
parents | 2fe512c7bfdf |
children |
rev | line source |
---|---|
10
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
1 /* |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
2 * This program is free software; you can redistribute it and/or modify |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
3 * it under the terms of the GNU General Public License as published by |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
4 * the Free Software Foundation; either version 3 of the License, or |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
5 * (at your option) any later version. |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
6 * |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
7 * Written (W) 2009-2011 Regina Bohnert |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
8 * Copyright (C) 2009-2011 Max Planck Society |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
9 */ |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
10 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
11 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
12 #include <stdio.h> |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
13 #include <stdlib.h> |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
14 #include <signal.h> |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
15 #include <ctype.h> |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
16 #include <assert.h> |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
17 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
18 #include <vector> |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
19 using std::vector; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
20 #include <string> |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
21 using std::string; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
22 #include <algorithm> |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
23 using std::find; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
24 using std::min; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
25 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
26 #include <mex.h> |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
27 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
28 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
29 char *get_string(const mxArray *prhs); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
30 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
31 typedef unsigned int uint32_t; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
32 typedef unsigned char uint8_t; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
33 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
34 /* |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
35 * [read_len num_reads] = get_bam_properties(fname, path_samtools, contig_name) |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
36 * |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
37 * -- input -- |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
38 * prhs[0] file name of paired reads in BAM format (sorted by read id) |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
39 * prhs[1] path to samtools |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
40 * prhs[2] contig name |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
41 * |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
42 * -- output -- |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
43 * plhs[0] length of read |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
44 * plhs[1] number of unique reads |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
45 */ |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
46 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
47 // checks for the right number of arguments |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
48 if (nrhs !=3 || nlhs > 2) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
49 mexErrMsgTxt("number of input and output args should be 3 and 2\nUSAGE:\n [read_len, num_reads] = get_bam_properties(fname, path_samtools, contig_name)\n"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
50 return; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
51 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
52 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
53 signal(SIGCHLD, SIG_IGN); // avoid zombies |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
54 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
55 // read input arguments |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
56 char *fname = get_string(prhs[0]); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
57 char *path_samtools = get_string(prhs[1]); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
58 char *contig_name = get_string(prhs[2]); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
59 char command[10000]; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
60 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
61 sprintf(command, "%s./samtools view %s %s 2>/dev/null", path_samtools, fname, contig_name); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
62 //printf("%s\n", command); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
63 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
64 // get number of unique reads |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
65 int status; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
66 uint32_t num_unique_reads = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
67 char command2[10000]; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
68 sprintf(command2, "%s | cut -f 1 | sort -u | wc -l", command); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
69 FILE* fp = popen(command2, "r"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
70 if (fp == NULL) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
71 mexErrMsgTxt("Error using popen\n"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
72 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
73 int num_scans = 1; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
74 num_scans = fscanf(fp, "%d", &num_unique_reads); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
75 if (num_scans != 1) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
76 rewind(fp); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
77 char ret[1000]; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
78 fgets(ret, 1000, fp); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
79 fprintf(stdout, "%s", ret); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
80 mexErrMsgTxt("Could not determine number of reads\n"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
81 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
82 status = pclose(fp); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
83 //printf("%i", num_unique_reads); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
84 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
85 // select reads for given positions and strand |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
86 int num_rows_selected = min((int) num_unique_reads, 100); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
87 sprintf(command, "%s | head -n %i | cut -f 1-11", command, num_rows_selected); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
88 fp = popen(command, "r"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
89 if (fp == NULL) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
90 mexErrMsgTxt("Error using popen\n"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
91 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
92 /* SAM format |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
93 1: read id, 2: flag, 3: reference name, 4: start (1-based, incl.), 5: mapping quality, |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
94 6: CIGAR, 7: mate reference name, 8: mate start (1-based, incl.), 9: insert size, 10: read, 11: quality |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
95 12+: additional tags |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
96 */ |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
97 uint32_t read_idx = 0, row_idx = 0, num_col = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
98 uint32_t flag = 0, start_pos = 0, map_score = 0, mate_end_pos = 0, num_matches = 0, num_del = 0, num_ins = 0, ins_size = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
99 char ri [1000], read_contig_name [1000], cg [1000], mate_read_id [1000], read [1000], read_qual [1000]; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
100 string last_read_id; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
101 vector<uint32_t> block_lengths, block_starts; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
102 vector<string> read_ids; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
103 vector<string>::iterator it; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
104 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
105 uint32_t read_len = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
106 bool empty_line = true; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
107 int num_rows = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
108 while(empty_line && num_rows < num_rows_selected) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
109 num_col = fscanf(fp, "%s\t%i\t%s\t%i\t%i\t%s\t%s\t%i\t%i\t%s\t%s", &ri, &flag, &read_contig_name, &start_pos, &map_score, &cg, &mate_read_id, &mate_end_pos, &ins_size, &read, &read_qual); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
110 if (num_col != 11) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
111 mexErrMsgTxt("error reading SAM line\n"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
112 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
113 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
114 string cigar = (string) cg; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
115 // ignore lines with reads w/o mapping information |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
116 if (start_pos == 0 || cigar.compare("*")==0) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
117 continue; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
118 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
119 // parse CIGAR |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
120 uint last_c = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
121 string last_str; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
122 num_matches = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
123 char *end = NULL; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
124 uint32_t tmp_nm = 0, tmp_nd = 0, tmp_ni = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
125 uint32_t last_block_start = 0, last_block_length = 0, last_intron_len = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
126 block_lengths.clear(); block_starts.clear(); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
127 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
128 for (uint c = 0; c < cigar.size(); c++) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
129 switch (cigar[c]) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
130 case 'M': |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
131 last_str = cigar.substr(last_c, c-last_c); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
132 tmp_nm = strtoul(last_str.c_str(), &end, 10); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
133 if (*end != '\0') |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
134 mexErrMsgTxt("error: number of mismatches\n"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
135 end = NULL; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
136 last_block_length += tmp_nm; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
137 num_matches += tmp_nm; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
138 last_c = c + 1; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
139 break; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
140 case 'I': |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
141 last_str = cigar.substr(last_c, c-last_c); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
142 tmp_ni = strtoul(last_str.c_str(), &end, 10); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
143 if (*end != '\0') |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
144 mexErrMsgTxt("error: number of insertions\n"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
145 end = NULL; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
146 num_ins += tmp_ni; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
147 last_c = c + 1; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
148 break; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
149 case 'D': |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
150 last_str = cigar.substr(last_c, c-last_c); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
151 tmp_nd = strtoul(last_str.c_str(), &end, 10); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
152 if (*end != '\0') |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
153 mexErrMsgTxt("error: number of deletions\n"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
154 end = NULL; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
155 num_del += tmp_nd; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
156 last_block_length += tmp_nd; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
157 last_c = c + 1; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
158 break; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
159 case 'N': |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
160 last_str = cigar.substr(last_c, c-last_c); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
161 last_intron_len = strtoul(last_str.c_str(), &end, 10); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
162 end = NULL; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
163 last_c = c + 1; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
164 break; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
165 case 'S': |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
166 break; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
167 case 'H': |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
168 break; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
169 case 'P': |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
170 break; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
171 default: |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
172 break; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
173 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
174 if (cigar[c] == 'N' || c==cigar.size()-1) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
175 block_starts.push_back(last_block_start); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
176 last_block_start = last_block_start + last_block_length + last_intron_len; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
177 last_intron_len = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
178 block_lengths.push_back(last_block_length); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
179 last_block_length = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
180 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
181 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
182 read_len = 0; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
183 for (uint n = 0; n < block_lengths.size(); n++) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
184 read_len += block_lengths[n]; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
185 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
186 empty_line = false; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
187 } // end of stream parsing |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
188 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
189 status = pclose(fp); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
190 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
191 if (empty_line) |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
192 mexErrMsgTxt("Could not determine read length\n"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
193 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
194 plhs[0] = mxCreateDoubleScalar((double) read_len); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
195 plhs[1] = mxCreateDoubleScalar((double) num_unique_reads); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
196 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
197 return; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
198 } |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
199 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
200 |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
201 char *get_string(const mxArray *prhs) { |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
202 char *buf; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
203 int buflen; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
204 if (!prhs) |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
205 mexErrMsgTxt("get_string called with NULL pointer arg"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
206 if (!mxIsChar(prhs)) |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
207 mexErrMsgTxt("input is not a string"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
208 if (mxGetM(prhs) != 1) |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
209 mexErrMsgTxt("input is not a row vector"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
210 buflen = mxGetN(prhs) + 1; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
211 buf = (char*) malloc(buflen); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
212 /* copy the string from prhs into buf and add terminating NULL char */ |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
213 if (mxGetString(prhs, buf, buflen)) |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
214 mexErrMsgTxt("not enough space"); |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
215 return buf; |
2fe512c7bfdf
DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff
changeset
|
216 } |