annotate src/splicetrap.estimate.cpp @ 5:2ebca9da5e42 draft default tip

planemo upload
author bioitcore
date Thu, 07 Sep 2017 17:39:24 -0400
parents adc0f7765d85
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
1 //Author: Jie Wu@CSHL
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
2 //TO replace the original Perl code
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
3
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
4 #include <stdio.h>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
5 #include <stdlib.h>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
6 #include <iostream>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
7 #include <fstream>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
8 #include <string.h>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
9 #include <unistd.h>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
10 #include <sstream>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
11 #include <map>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
12 #include <vector>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
13 #include <math.h>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
14 #include <time.h>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
15 #include <limits>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
16 #include <iomanip>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
17
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
18 using namespace std;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
19
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
20 int MAX_LINE_LEN = 1024*1024;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
21
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
22 void printusage()
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
23 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
24 cout<< "\tUsage:"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
25 cout<<"\t-s\tread_size"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
26 cout<<"\t-b\tIRM file"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
27 cout<<"\t-f\tFZM file"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
28 cout<<"\t-1\tmapping result 1"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
29 cout<<"\t-2\tmapping result 2"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
30 cout<<"\t-d\tTXdb database file"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
31 cout<<"\t-o\toutput prefix"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
32
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
33 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
34
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
35 struct ReadPairInfo
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
36 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
37 bool isPaired;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
38 vector<int> IsoFlag; //flag as 1 if mapped to the isoform
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
39 vector<int> Size; //fragment size in the isoform
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
40 vector<int> Pos;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
41 };
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
42
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
43 class TXdb_entry{
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
44 public:
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
45 string chrid;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
46 //int start;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
47 //int end;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
48 string id;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
49 bool iscomplete;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
50 int start;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
51 int end;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
52 int exonstarts[3];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
53 char strand;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
54 int exonsize[3];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
55 int len1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
56 int len2;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
57 vector< ReadPairInfo > pairs;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
58 int snum1; //single-end reads
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
59 int snum2;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
60 int snum12;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
61 float ir, bir;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
62 int enum1; //exon body reads
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
63 int enum2;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
64 int enum3;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
65
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
66 int jnum12;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
67 int jnum23;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
68 int jnum13;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
69
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
70 int totalreadnum;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
71 void estimate(vector<double> &FZM, map<string, vector<double> > &IRM, int &read_size)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
72 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
73 float Maxe=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
74 float Max = -numeric_limits<float>::max();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
75 float BMaxe=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
76 float BMax=-numeric_limits<float>::max();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
77 for(float e1=0.001; e1<1;e1=e1+0.001)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
78 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
79 float efix1 = e1*len1/
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
80 (e1*exonsize[1]+len2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
81 float efix2 = 1-efix1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
82 float LL = snum12*log( efix1/(len1-read_size+1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
83 + efix2/(len2-read_size+1) )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
84 + snum1*log(efix1/(len1-read_size+1))
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
85 + snum2*log(efix2/(len2-read_size+1));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
86 //cout<<LL<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
87 //cout<<efix1<<"\t" <<pairs.size()<<"\t" <<len2<<"\t" <<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
88 //int num1=0, num2=0,num12=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
89 for(int i=0;i<pairs.size();i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
90 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
91 if(pairs[i].IsoFlag[0] == 1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
92 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
93 if(pairs[i].IsoFlag[1] == 0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
94 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
95 // num1++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
96 float tmp=FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
97 if (tmp == 0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
98 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
99 LL = LL - 308;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
100 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
101 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
102 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
103 LL = LL + log(tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
104 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
105 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
106 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
107 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
108 // num12++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
109 float tmp=(FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1))
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
110 + (FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
111 if(tmp ==0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
112 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
113 LL = LL -308;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
114 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
115 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
116 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
117 LL = LL + log(tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
118 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
119 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
120 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
121 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
122 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
123 //num2++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
124 float tmp = FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
125 if(tmp ==0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
126 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
127 LL= LL - 308;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
128 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
129 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
130 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
131 LL = LL +log(tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
132 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
133 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
134 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
135 //cout<<num1<<"\t"<<num2<<"\t"<<num12<<"\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
136 //cout<<LL<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
137
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
138 if(IRM["CA"].size() > 0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
139 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
140 float BLL;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
141 string eventtype =id.substr(0,2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
142 if(!eventtype.compare("ME"))
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
143 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
144 eventtype = "CA";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
145 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
146 BLL = LL + log(IRM[eventtype][int(e1/0.001)]);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
147 if(BLL > BMax)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
148 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
149 BMax = BLL;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
150 BMaxe = e1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
151 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
152 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
153 //cout<<LL<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
154 if(LL > Max)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
155 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
156 Max= LL;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
157 Maxe = e1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
158
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
159 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
160
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
161 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
162 ir= Maxe;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
163 bir= BMaxe;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
164 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
165
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
166 TXdb_entry(char *line)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
167 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
168 size_t found;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
169 string linestr(line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
170 found = linestr.find("[L]");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
171 if(found != string::npos)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
172 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
173 iscomplete=1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
174
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
175 char *token;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
176
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
177 token = strtok(line,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
178 if(token==NULL)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
179 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
180 cout<<"error1"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
181 exit(0);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
182 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
183 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
184 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
185 chrid = token;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
186 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
187 token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
188 if(token==NULL)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
189 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
190 cout<<"error2"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
191 exit(0);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
192 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
193 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
194 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
195 sscanf(token,"%d",&start);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
196 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
197 token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
198 if(token==NULL)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
199 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
200 cout<<"error3"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
201 exit(0);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
202 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
203 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
204 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
205 sscanf(token,"%d",&end);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
206 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
207 token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
208 if(token==NULL)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
209 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
210 cout<<"error4"<<endl;exit(0);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
211 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
212 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
213 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
214 id = token;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
215 id = id.substr(0,id.size()-3);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
216 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
217 token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
218 token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
219 if(token==NULL)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
220 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
221 cout<<"error5"<<endl;exit(0);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
222 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
223 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
224 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
225 sscanf(token,"%c",&strand);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
226 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
227 for(int i=0;i<5;i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
228 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
229 token=strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
230 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
231 if(token==NULL){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
232 cout<<"error6"<<endl;exit(0);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
233 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
234 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
235 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
236 string str_tmp(token);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
237 size_t found_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
238 int *pexonsize = exonsize;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
239 while((found_tmp = str_tmp.find(",")) != string::npos){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
240 sscanf((str_tmp.substr(0,found_tmp)).c_str(),"%d",pexonsize);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
241 pexonsize++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
242 str_tmp.erase(0,found_tmp+1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
243 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
244 sscanf(str_tmp.c_str(),"%d",pexonsize);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
245
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
246 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
247 token=strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
248
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
249 if(token==NULL){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
250 cout<<"error7"<<endl;exit(0);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
251 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
252 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
253 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
254 string str_tmp(token);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
255 size_t found_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
256 int *pexonstarts = exonstarts;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
257 while((found_tmp = str_tmp.find(",")) != string::npos){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
258 sscanf((str_tmp.substr(0,found_tmp)).c_str(),"%d",pexonstarts);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
259 pexonstarts++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
260 str_tmp.erase(0,found_tmp+1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
261 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
262 sscanf(str_tmp.c_str(),"%d",pexonstarts);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
263
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
264 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
265 len1 = exonsize[0]+exonsize[1]+exonsize[2];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
266 len2 = exonsize[0]+exonsize[2];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
267 snum12=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
268 snum1 =0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
269 snum2 =0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
270 enum1=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
271 enum2=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
272 enum3=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
273 jnum12=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
274 jnum13=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
275 jnum23=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
276 totalreadnum=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
277 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
278 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
279 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
280 iscomplete = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
281 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
282 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
283 };
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
284
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
285 class ReadInfo
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
286 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
287 public:
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
288 bool isUseful;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
289 char *pline;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
290 void scan(map<string, vector<int> > &map_pos )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
291 //eventname, pos in L/S iso
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
292 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
293 char *token;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
294 token = strtok(pline, "\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
295 token = strtok(NULL, "\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
296 token = strtok(NULL, "\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
297 token = strtok(NULL, "\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
298 string mapinfo(token);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
299 size_t beg, end;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
300 end = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
301 while( (beg= mapinfo.find_first_of("/", end))!=string::npos)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
302 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
303 beg ++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
304 end = mapinfo.find_first_of("[",beg);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
305 string eventid = mapinfo.substr( beg, end-beg ) ;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
306 vector<int> pos(2,-1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
307 if(!map_pos.count(eventid))
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
308 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
309 map_pos.insert(pair<string, vector<int> >
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
310 (eventid, pos) );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
311 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
312 end ++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
313 string LorS = mapinfo.substr( end,1 );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
314 beg = mapinfo.find_first_of(":",end );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
315 beg ++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
316 end = mapinfo.find_first_of(",",beg );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
317 string pos_str(mapinfo.substr(beg, end-2-beg));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
318
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
319 if(LorS.compare("L") == 0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
320 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
321 sscanf(pos_str.c_str(),"%d",&(map_pos[eventid][0]) );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
322 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
323 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
324 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
325 sscanf(pos_str.c_str(),"%d",&(map_pos[eventid][1]) );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
326 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
327
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
328 //cout<<eventid<<"\t"<<map_pos[eventid][0]<<"\t"<<map_pos[eventid][1]<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
329 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
330
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
331 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
332 ReadInfo(char *line)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
333 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
334 string line_str(line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
335 string line_str_last2=line_str.substr(line_str.size()-2,2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
336 if(line_str_last2.compare("NM") == 0 )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
337 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
338 isUseful = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
339 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
340 else if(line_str_last2.compare("MT") == 0 )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
341 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
342 isUseful = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
343 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
344 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
345 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
346 isUseful = 1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
347 pline = line;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
348 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
349 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
350 };
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
351
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
352
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
353
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
354 class ReadPair
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
355 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
356 public:
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
357 map<string, ReadPairInfo> mappinginfo;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
358 ReadPair(map<string, vector<int> > &read1_mappos, map<string, vector<int> > &read2_mappos, int &read_size)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
359 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
360 map<string, vector<int> >::iterator it1 = read1_mappos.begin();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
361 for(;it1!=read1_mappos.end();it1++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
362 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
363 ReadPairInfo ReadPairInfo_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
364 ReadPairInfo_tmp.IsoFlag.resize(2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
365 ReadPairInfo_tmp.Size.resize(2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
366 //ReadPairInfo_tmp.Pos.resize(4);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
367 ReadPairInfo_tmp.Pos.assign(4,-1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
368 string eventid = it1->first;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
369 if(read2_mappos.count(eventid) > 0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
370 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
371 ReadPairInfo_tmp.isPaired = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
372 for(int i=0;i<2;i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
373 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
374 //cout<<read1_mappos[eventid][i]<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
375 if(read2_mappos[eventid][i] == -1 || read1_mappos[eventid][i] == -1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
376 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
377 ReadPairInfo_tmp.IsoFlag[i] = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
378 ReadPairInfo_tmp.Size[i] = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
379 //discard reads like this
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
380 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
381 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
382 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
383
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
384 ReadPairInfo_tmp.isPaired = 1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
385 ReadPairInfo_tmp.IsoFlag[i] = 1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
386 ReadPairInfo_tmp.Size[i] = abs(read2_mappos[eventid][i]
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
387 -read1_mappos[eventid][i])
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
388 +read_size;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
389 ReadPairInfo_tmp.Pos[2*i] = read1_mappos[eventid][i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
390 ReadPairInfo_tmp.Pos[2*i+1] = read2_mappos[eventid][i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
391 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
392 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
393 mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
394 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
395 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
396 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
397 ReadPairInfo_tmp.isPaired = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
398 for(int i=0;i<2;i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
399 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
400 ReadPairInfo_tmp.Pos[2*i] = read1_mappos[eventid][i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
401 ReadPairInfo_tmp.Size[i] = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
402 if(read1_mappos[eventid][i] == -1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
403 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
404 ReadPairInfo_tmp.IsoFlag[i] = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
405
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
406 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
407 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
408 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
409 ReadPairInfo_tmp.IsoFlag[i] = 1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
410 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
411 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
412 mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
413 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
414 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
415 map<string, vector<int> >::iterator it2 = read2_mappos.begin();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
416 for(;it2!=read2_mappos.end();it2++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
417 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
418 string eventid = it2->first;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
419 if(mappinginfo.count(eventid)>0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
420 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
421 continue;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
422 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
423 ReadPairInfo ReadPairInfo_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
424 ReadPairInfo_tmp.IsoFlag.resize(2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
425 ReadPairInfo_tmp.Size.resize(2);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
426 //ReadPairInfo_tmp.Pos.resize(4);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
427 ReadPairInfo_tmp.Pos.assign(4,-1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
428 if(read1_mappos.count(eventid) > 0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
429 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
430 ReadPairInfo_tmp.isPaired = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
431 for(int i=0;i<2;i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
432 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
433 if(read2_mappos[eventid][i] == -1 || read1_mappos[eventid][i] == -1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
434 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
435 ReadPairInfo_tmp.IsoFlag[i] = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
436 ReadPairInfo_tmp.Size[i] = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
437 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
438 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
439 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
440 ReadPairInfo_tmp.isPaired = 1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
441 ReadPairInfo_tmp.IsoFlag[i] = 1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
442 ReadPairInfo_tmp.Size[i] = abs(read2_mappos[eventid][i]
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
443 -read1_mappos[eventid][i]
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
444 +read_size);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
445 ReadPairInfo_tmp.Pos[2*i] = read1_mappos[eventid][i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
446 ReadPairInfo_tmp.Pos[2*i+1]=read2_mappos[eventid][i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
447 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
448 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
449 mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
450 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
451 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
452 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
453 ReadPairInfo_tmp.isPaired = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
454 for(int i=0;i<2;i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
455 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
456 ReadPairInfo_tmp.Pos[2*i+1]=read2_mappos[eventid][i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
457 ReadPairInfo_tmp.Size[i] = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
458 if(read2_mappos[eventid][i] == -1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
459 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
460 ReadPairInfo_tmp.IsoFlag[i] = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
461 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
462 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
463 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
464 ReadPairInfo_tmp.IsoFlag[i] = 1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
465 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
466 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
467 mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
468 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
469
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
470 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
471 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
472 };
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
473
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
474 int main (int argc, char **argv) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
475 int next_option;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
476 int digit_optind = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
477 int read_size = 36;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
478 char *dbfile_name = 0,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
479 *IRM_file_name = 0,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
480 *FZM_file_name = 0,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
481 *output_file_prefix = 0,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
482 *read1_nomt_file_name = 0,
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
483 *read2_nomt_file_name = 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
484
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
485 while ( (next_option = getopt(argc, argv, "s:d:b:f:o:1:2:")) != -1) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
486 int this_option_optind = optind ? optind : 1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
487 switch (next_option) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
488 case 's': sscanf (optarg,"%d",&read_size); break;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
489 case 'd': dbfile_name = optarg; break;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
490 case 'b': IRM_file_name = optarg; break;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
491 case 'f': FZM_file_name = optarg; break;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
492 case 'o': output_file_prefix = optarg; break;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
493 case '1': read1_nomt_file_name = optarg; break;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
494 case '2': read2_nomt_file_name = optarg; break;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
495 default: break;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
496 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
497 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
498 if (!read_size||
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
499 !dbfile_name||
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
500 !FZM_file_name||
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
501 !output_file_prefix||
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
502 !read1_nomt_file_name||
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
503 !read2_nomt_file_name)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
504 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
505 printusage();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
506 return 0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
507 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
508 clock_t clock_start, clock_finish;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
509 clock_start=clock();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
510 string ratio_file_name = output_file_prefix;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
511 ratio_file_name.append(".ratio");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
512 string log_file_name = output_file_prefix;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
513 log_file_name.append(".log");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
514 string num_file_name = output_file_prefix;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
515 num_file_name.append(".nums");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
516
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
517 ofstream log_file(log_file_name.c_str());
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
518 cout<<"Log file name:\t"<<log_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
519 cout<<"Parameters:"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
520 cout<<"Read size:\t"<<read_size<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
521 log_file<<"#read_size:\t"<<read_size<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
522 cout<<"Database:\t"<<dbfile_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
523 log_file<<"#dbfile_name:\t"<<dbfile_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
524 cout<<"Fragment size file name:\t"<<FZM_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
525 log_file<<"#FZM_file_name:\t"<<FZM_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
526 cout<<"Output file prefix:\t"<<output_file_prefix<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
527 cout<<"Read end1 file:\t"<<read1_nomt_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
528 cout<<"Read end2 file:\t"<<read2_nomt_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
529 log_file<<"#read1_nomt_file_name:\t"<<read1_nomt_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
530 log_file<<"#read2_nomt_file_name:\t"<<read2_nomt_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
531 if(IRM_file_name)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
532 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
533 cout<<"Inclusion ratio distn. file:\t"<<IRM_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
534 log_file<<"#IRM_file_name:\t"<<IRM_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
535
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
536 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
537 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
538 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
539 cout<<"Inclusion ratio distn. file:\tNA"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
540 log_file<<"#IRM_file_name:\tNA"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
541 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
542
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
543 //start reading files, dbfile first
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
544 cout<<"Reading database file..."<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
545 map<string, TXdb_entry> TXdb_entries;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
546 ifstream dbfile(dbfile_name);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
547 char dbfile_line[MAX_LINE_LEN];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
548 while(dbfile.getline(dbfile_line,MAX_LINE_LEN)){
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
549 TXdb_entry entry_tmp(dbfile_line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
550 if(entry_tmp.iscomplete)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
551 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
552 TXdb_entries.insert(pair<string, TXdb_entry>
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
553 (entry_tmp.id,entry_tmp)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
554 );
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
555 //cout<<entry_tmp.exonsize[1]<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
556 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
557
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
558 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
559 dbfile.close();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
560
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
561 //start reading bayes prior if there is
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
562 map<string, vector< double > > IRM;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
563 IRM.insert(pair<string, vector<double> > ("CA", vector<double>()));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
564 IRM.insert(pair<string, vector<double> > ("CS", vector<double>()));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
565 IRM.insert(pair<string, vector<double> > ("IR", vector<double>()));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
566 IRM.insert(pair<string, vector<double> > ("AA", vector<double>()));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
567 IRM.insert(pair<string, vector<double> > ("AD", vector<double>()));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
568 //IRM.insert(pair<string, vector<double> > ("AI", vector<double>()));
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
569 if(IRM_file_name)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
570 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
571 cout<<"Reading inclusion ratio distn. file..."<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
572 ifstream IRM_file(IRM_file_name);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
573 char IRM_line[MAX_LINE_LEN];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
574 IRM_file.getline(IRM_line, MAX_LINE_LEN);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
575 while(IRM_file.getline(IRM_line, MAX_LINE_LEN))
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
576 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
577 char *token;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
578 token = strtok(IRM_line, "\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
579 if (token != NULL) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
580 double hist_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
581 sscanf(token,"%lf",&hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
582 IRM["CA"].push_back(hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
583 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
584 token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
585 if (token != NULL) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
586 double hist_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
587 sscanf(token,"%lf",&hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
588 IRM["AD"].push_back(hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
589 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
590 token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
591 if (token != NULL) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
592 double hist_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
593 sscanf(token,"%lf",&hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
594 IRM["AA"].push_back(hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
595 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
596 /* token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
597 if (token != NULL) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
598 double hist_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
599 sscanf(token,"%lf",&hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
600 IRM["AI"].push_back(hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
601 }*/
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
602 token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
603 if (token != NULL) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
604 double hist_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
605 sscanf(token,"%lf",&hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
606 IRM["IR"].push_back(hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
607 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
608 token = strtok(NULL,"\t");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
609 if (token != NULL) {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
610 double hist_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
611 sscanf(token,"%lf",&hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
612 IRM["CS"].push_back(hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
613 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
614
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
615 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
616 IRM_file.close();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
617 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
618
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
619 //read FZM
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
620 cout<<"Reading fragment size distn. file..."<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
621 ifstream FZM_file(FZM_file_name);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
622 vector<double> FZM;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
623 char FZM_file_line[MAX_LINE_LEN];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
624 FZM_file.getline(FZM_file_line,MAX_LINE_LEN);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
625 while(FZM_file.getline (FZM_file_line,MAX_LINE_LEN) )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
626 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
627 double hist_tmp;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
628 sscanf(FZM_file_line, "%lf", &hist_tmp) ;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
629 FZM.push_back(hist_tmp);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
630 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
631 FZM_file.close();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
632 //start to read reads
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
633 cout<<"Reading reads from both ends...."<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
634 ifstream read1_nomt_file(read1_nomt_file_name);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
635 ifstream read2_nomt_file(read2_nomt_file_name);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
636 char read1_line[MAX_LINE_LEN];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
637 char read2_line[MAX_LINE_LEN];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
638 int total_single_read_num=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
639 int total_pair_read_num=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
640 int total_read_num=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
641 while(read1_nomt_file.getline(read1_line, MAX_LINE_LEN))
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
642 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
643 total_read_num++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
644 ReadInfo read1(read1_line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
645 map<string, vector<int> > read1_mappos;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
646 if(read1.isUseful)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
647 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
648 read1.scan(read1_mappos);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
649 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
650 read2_nomt_file.getline(read2_line, MAX_LINE_LEN);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
651 ReadInfo read2(read2_line);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
652 map<string, vector<int> > read2_mappos;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
653 if(read2.isUseful)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
654 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
655 read2.scan(read2_mappos);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
656 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
657 //pairing
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
658 ReadPair read_pair_ins (read1_mappos, read2_mappos, read_size);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
659 //feed them into TXdb_entries
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
660 map<string , ReadPairInfo>::iterator it = read_pair_ins.mappinginfo.begin();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
661 for(; it!=read_pair_ins.mappinginfo.end();it++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
662 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
663 string eventid = it->first;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
664 map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.find(eventid);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
665 if(txdb_it == TXdb_entries.end() )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
666 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
667 continue;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
668 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
669 //get numbers
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
670 for(int i=0; i<4;i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
671 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
672 if(read_pair_ins.mappinginfo[eventid].Pos[i]!=-1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
673 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
674 int pos_tmp1 = read_pair_ins.mappinginfo[eventid].Pos[i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
675 int pos_tmp2;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
676 if(i<2)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
677 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
678 if (txdb_it->second.strand == '-')
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
679 pos_tmp1 = txdb_it->second.len1 -pos_tmp1 +1- read_size;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
680 pos_tmp2 = pos_tmp1 + read_size;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
681 if (pos_tmp2 <= txdb_it->second.exonsize[0])
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
682 { txdb_it->second.enum1++;continue;}
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
683 if (pos_tmp1 <= txdb_it->second.exonsize[0] )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
684 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
685 if(read_pair_ins.mappinginfo[eventid].IsoFlag[1] !=1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
686 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
687 txdb_it->second.jnum12++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
688 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
689 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
690 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
691 txdb_it->second.enum1++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
692 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
693 continue;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
694 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
695 if (pos_tmp2 <= txdb_it->second.exonsize[0]+txdb_it->second.exonsize[1])
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
696 { txdb_it->second.enum2++;continue;}
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
697 if (pos_tmp1 <= txdb_it->second.exonsize[0]+txdb_it->second.exonsize[1])
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
698 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
699 if(read_pair_ins.mappinginfo[eventid].IsoFlag[1] !=1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
700 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
701 txdb_it->second.jnum23++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
702 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
703 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
704 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
705 txdb_it->second.enum2++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
706 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
707 continue;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
708
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
709 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
710 txdb_it->second.enum3++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
711 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
712 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
713 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
714 if (read_pair_ins.mappinginfo[eventid].IsoFlag[0] == 1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
715 && read_pair_ins.mappinginfo[eventid].IsoFlag[1] == 1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
716 continue;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
717 if (txdb_it->second.strand == '-')
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
718 pos_tmp1 = txdb_it->second.len2 -pos_tmp1 +1- read_size;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
719 pos_tmp2 = pos_tmp1 + read_size;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
720 if (pos_tmp2 <= txdb_it->second.exonsize[0])
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
721 { txdb_it->second.enum1++;continue;}
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
722 if (pos_tmp1 <= txdb_it->second.exonsize[0] )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
723 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
724 if(read_pair_ins.mappinginfo[eventid].IsoFlag[0] !=1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
725 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
726 txdb_it->second.jnum13++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
727 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
728 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
729 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
730 txdb_it->second.enum1++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
731 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
732 continue;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
733 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
734 txdb_it->second.enum3++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
735
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
736
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
737 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
738 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
739 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
740
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
741 if(read_pair_ins.mappinginfo[eventid].isPaired)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
742 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
743 total_pair_read_num += 2;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
744 txdb_it->second.totalreadnum += 2;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
745 (txdb_it->second).pairs.push_back(read_pair_ins.mappinginfo[eventid]);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
746 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
747 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
748 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
749 total_single_read_num += 2;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
750 txdb_it->second.totalreadnum += 1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
751 if(read_pair_ins.mappinginfo[eventid].IsoFlag[0] == 1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
752 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
753 if(read_pair_ins.mappinginfo[eventid].IsoFlag[1] == 0)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
754 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
755 (txdb_it->second).snum1++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
756 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
757 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
758 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
759 (txdb_it->second).snum12++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
760 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
761 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
762 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
763 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
764 (txdb_it->second).snum2++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
765 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
766 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
767 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
768
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
769
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
770 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
771
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
772 read1_nomt_file.close();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
773 read2_nomt_file.close();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
774 cout<<"Total read number:\t"<<total_read_num*2<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
775 cout<<"Singled reads used:\t"<<total_single_read_num<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
776 cout<<"Paired reads used:\t"<<total_pair_read_num<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
777
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
778 log_file<<"#total_read_num:\t"<<total_read_num*2<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
779 log_file<<"#total_single_read_num:\t"<<total_single_read_num<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
780 log_file<<"#total_pair_read_num:\t"<<total_pair_read_num<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
781
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
782 //Calculate ratios and output
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
783 cout<<"Now doing estimations and output to "<<ratio_file_name<<"\t"<<num_file_name<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
784 ofstream ratio_file(ratio_file_name.c_str());
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
785 ofstream num_file(num_file_name.c_str());
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
786 map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.begin();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
787 for(;txdb_it!=TXdb_entries.end();txdb_it++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
788 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
789 string eventid = txdb_it->first;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
790 //cout<< eventid <<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
791 txdb_it->second.estimate(FZM,IRM,read_size);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
792 /* ratio_file<< eventid<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
793 <<setprecision(4)<<fixed
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
794 <<txdb_it->second.ir<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
795 <<txdb_it->second.bir<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
796 <<"NA\tNA\tNA\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
797 <<txdb_it->second.jnum12<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
798 <<txdb_it->second.jnum23<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
799 <<txdb_it->second.jnum13<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
800 <<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
801 <<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
802 <<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
803 <<txdb_it->second.exonsize[0]<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
804 <<txdb_it->second.exonsize[1]<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
805 <<txdb_it->second.exonsize[2]<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
806 <<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
807 */
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
808 ratio_file<<eventid<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
809 <<setprecision(4)<<fixed
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
810 <<txdb_it->second.ir<<"\t" //1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
811 <<txdb_it->second.bir<<"\t" //2
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
812 <<txdb_it->second.chrid<<"\t" //3
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
813 <<txdb_it->second.start<<"," //
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
814 <<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\t" //4
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
815 <<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
816 <<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\t"//5
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
817 <<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
818 <<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\t"//6
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
819 <<txdb_it->second.strand<<"\t"//7
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
820 <<txdb_it->second.jnum12<<"\t"//8
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
821 <<txdb_it->second.jnum23<<"\t"//9
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
822 <<txdb_it->second.jnum13<<"\t"//10
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
823 <<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\t"//11
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
824 <<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\t"//12
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
825 <<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\t"//13`
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
826 <<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<"\t"//14
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
827 <<txdb_it->second.exonsize[0]<<"\t"//15
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
828 <<txdb_it->second.exonsize[1]<<"\t"//16
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
829 <<txdb_it->second.exonsize[2]<<endl;//17
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
830
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
831 num_file<< eventid<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
832 <<txdb_it->second.chrid<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
833 <<txdb_it->second.start<<","
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
834 <<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
835 <<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
836 <<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
837 <<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
838 <<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
839 <<txdb_it->second.strand<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
840 <<txdb_it->second.enum1<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
841 <<txdb_it->second.enum2<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
842 <<txdb_it->second.enum3<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
843 <<txdb_it->second.jnum12<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
844 <<txdb_it->second.jnum23<<"\t"
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
845 <<txdb_it->second.jnum13<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
846 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
847 log_file.close();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
848 ratio_file.close();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
849 num_file.close();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
850 clock_finish = clock();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
851 cout<<"Done! time used:"<<(double)(clock_finish-clock_start)/CLOCKS_PER_SEC<<" seconds"<<endl;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
852
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
853
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
854 }