comparison src/splicetrap.estimate.cpp @ 1:adc0f7765d85 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 15:06:58 -0400
parents
children
comparison
equal deleted inserted replaced
0:d4ca551ca300 1:adc0f7765d85
1 //Author: Jie Wu@CSHL
2 //TO replace the original Perl code
3
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <iostream>
7 #include <fstream>
8 #include <string.h>
9 #include <unistd.h>
10 #include <sstream>
11 #include <map>
12 #include <vector>
13 #include <math.h>
14 #include <time.h>
15 #include <limits>
16 #include <iomanip>
17
18 using namespace std;
19
20 int MAX_LINE_LEN = 1024*1024;
21
22 void printusage()
23 {
24 cout<< "\tUsage:"<<endl;
25 cout<<"\t-s\tread_size"<<endl;
26 cout<<"\t-b\tIRM file"<<endl;
27 cout<<"\t-f\tFZM file"<<endl;
28 cout<<"\t-1\tmapping result 1"<<endl;
29 cout<<"\t-2\tmapping result 2"<<endl;
30 cout<<"\t-d\tTXdb database file"<<endl;
31 cout<<"\t-o\toutput prefix"<<endl;
32
33 }
34
35 struct ReadPairInfo
36 {
37 bool isPaired;
38 vector<int> IsoFlag; //flag as 1 if mapped to the isoform
39 vector<int> Size; //fragment size in the isoform
40 vector<int> Pos;
41 };
42
43 class TXdb_entry{
44 public:
45 string chrid;
46 //int start;
47 //int end;
48 string id;
49 bool iscomplete;
50 int start;
51 int end;
52 int exonstarts[3];
53 char strand;
54 int exonsize[3];
55 int len1;
56 int len2;
57 vector< ReadPairInfo > pairs;
58 int snum1; //single-end reads
59 int snum2;
60 int snum12;
61 float ir, bir;
62 int enum1; //exon body reads
63 int enum2;
64 int enum3;
65
66 int jnum12;
67 int jnum23;
68 int jnum13;
69
70 int totalreadnum;
71 void estimate(vector<double> &FZM, map<string, vector<double> > &IRM, int &read_size)
72 {
73 float Maxe=0;
74 float Max = -numeric_limits<float>::max();
75 float BMaxe=0;
76 float BMax=-numeric_limits<float>::max();
77 for(float e1=0.001; e1<1;e1=e1+0.001)
78 {
79 float efix1 = e1*len1/
80 (e1*exonsize[1]+len2);
81 float efix2 = 1-efix1;
82 float LL = snum12*log( efix1/(len1-read_size+1)
83 + efix2/(len2-read_size+1) )
84 + snum1*log(efix1/(len1-read_size+1))
85 + snum2*log(efix2/(len2-read_size+1));
86 //cout<<LL<<endl;
87 //cout<<efix1<<"\t" <<pairs.size()<<"\t" <<len2<<"\t" <<endl;
88 //int num1=0, num2=0,num12=0;
89 for(int i=0;i<pairs.size();i++)
90 {
91 if(pairs[i].IsoFlag[0] == 1)
92 {
93 if(pairs[i].IsoFlag[1] == 0)
94 {
95 // num1++;
96 float tmp=FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1);
97 if (tmp == 0)
98 {
99 LL = LL - 308;
100 }
101 else
102 {
103 LL = LL + log(tmp);
104 }
105 }
106 else
107 {
108 // num12++;
109 float tmp=(FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1))
110 + (FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1));
111 if(tmp ==0)
112 {
113 LL = LL -308;
114 }
115 else
116 {
117 LL = LL + log(tmp);
118 }
119 }
120 }
121 else
122 {
123 //num2++;
124 float tmp = FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1);
125 if(tmp ==0)
126 {
127 LL= LL - 308;
128 }
129 else
130 {
131 LL = LL +log(tmp);
132 }
133 }
134 }
135 //cout<<num1<<"\t"<<num2<<"\t"<<num12<<"\n";
136 //cout<<LL<<endl;
137
138 if(IRM["CA"].size() > 0)
139 {
140 float BLL;
141 string eventtype =id.substr(0,2);
142 if(!eventtype.compare("ME"))
143 {
144 eventtype = "CA";
145 }
146 BLL = LL + log(IRM[eventtype][int(e1/0.001)]);
147 if(BLL > BMax)
148 {
149 BMax = BLL;
150 BMaxe = e1;
151 }
152 }
153 //cout<<LL<<endl;
154 if(LL > Max)
155 {
156 Max= LL;
157 Maxe = e1;
158
159 }
160
161 }
162 ir= Maxe;
163 bir= BMaxe;
164 }
165
166 TXdb_entry(char *line)
167 {
168 size_t found;
169 string linestr(line);
170 found = linestr.find("[L]");
171 if(found != string::npos)
172 {
173 iscomplete=1;
174
175 char *token;
176
177 token = strtok(line,"\t");
178 if(token==NULL)
179 {
180 cout<<"error1"<<endl;
181 exit(0);
182 }
183 else
184 {
185 chrid = token;
186 }
187 token = strtok(NULL,"\t");
188 if(token==NULL)
189 {
190 cout<<"error2"<<endl;
191 exit(0);
192 }
193 else
194 {
195 sscanf(token,"%d",&start);
196 }
197 token = strtok(NULL,"\t");
198 if(token==NULL)
199 {
200 cout<<"error3"<<endl;
201 exit(0);
202 }
203 else
204 {
205 sscanf(token,"%d",&end);
206 }
207 token = strtok(NULL,"\t");
208 if(token==NULL)
209 {
210 cout<<"error4"<<endl;exit(0);
211 }
212 else
213 {
214 id = token;
215 id = id.substr(0,id.size()-3);
216 }
217 token = strtok(NULL,"\t");
218 token = strtok(NULL,"\t");
219 if(token==NULL)
220 {
221 cout<<"error5"<<endl;exit(0);
222 }
223 else
224 {
225 sscanf(token,"%c",&strand);
226 }
227 for(int i=0;i<5;i++)
228 {
229 token=strtok(NULL,"\t");
230 }
231 if(token==NULL){
232 cout<<"error6"<<endl;exit(0);
233 }
234 else
235 {
236 string str_tmp(token);
237 size_t found_tmp;
238 int *pexonsize = exonsize;
239 while((found_tmp = str_tmp.find(",")) != string::npos){
240 sscanf((str_tmp.substr(0,found_tmp)).c_str(),"%d",pexonsize);
241 pexonsize++;
242 str_tmp.erase(0,found_tmp+1);
243 }
244 sscanf(str_tmp.c_str(),"%d",pexonsize);
245
246 }
247 token=strtok(NULL,"\t");
248
249 if(token==NULL){
250 cout<<"error7"<<endl;exit(0);
251 }
252 else
253 {
254 string str_tmp(token);
255 size_t found_tmp;
256 int *pexonstarts = exonstarts;
257 while((found_tmp = str_tmp.find(",")) != string::npos){
258 sscanf((str_tmp.substr(0,found_tmp)).c_str(),"%d",pexonstarts);
259 pexonstarts++;
260 str_tmp.erase(0,found_tmp+1);
261 }
262 sscanf(str_tmp.c_str(),"%d",pexonstarts);
263
264 }
265 len1 = exonsize[0]+exonsize[1]+exonsize[2];
266 len2 = exonsize[0]+exonsize[2];
267 snum12=0;
268 snum1 =0;
269 snum2 =0;
270 enum1=0;
271 enum2=0;
272 enum3=0;
273 jnum12=0;
274 jnum13=0;
275 jnum23=0;
276 totalreadnum=0;
277 }
278 else
279 {
280 iscomplete = 0;
281 }
282 }
283 };
284
285 class ReadInfo
286 {
287 public:
288 bool isUseful;
289 char *pline;
290 void scan(map<string, vector<int> > &map_pos )
291 //eventname, pos in L/S iso
292 {
293 char *token;
294 token = strtok(pline, "\t");
295 token = strtok(NULL, "\t");
296 token = strtok(NULL, "\t");
297 token = strtok(NULL, "\t");
298 string mapinfo(token);
299 size_t beg, end;
300 end = 0;
301 while( (beg= mapinfo.find_first_of("/", end))!=string::npos)
302 {
303 beg ++;
304 end = mapinfo.find_first_of("[",beg);
305 string eventid = mapinfo.substr( beg, end-beg ) ;
306 vector<int> pos(2,-1);
307 if(!map_pos.count(eventid))
308 {
309 map_pos.insert(pair<string, vector<int> >
310 (eventid, pos) );
311 }
312 end ++;
313 string LorS = mapinfo.substr( end,1 );
314 beg = mapinfo.find_first_of(":",end );
315 beg ++;
316 end = mapinfo.find_first_of(",",beg );
317 string pos_str(mapinfo.substr(beg, end-2-beg));
318
319 if(LorS.compare("L") == 0)
320 {
321 sscanf(pos_str.c_str(),"%d",&(map_pos[eventid][0]) );
322 }
323 else
324 {
325 sscanf(pos_str.c_str(),"%d",&(map_pos[eventid][1]) );
326 }
327
328 //cout<<eventid<<"\t"<<map_pos[eventid][0]<<"\t"<<map_pos[eventid][1]<<endl;
329 }
330
331 }
332 ReadInfo(char *line)
333 {
334 string line_str(line);
335 string line_str_last2=line_str.substr(line_str.size()-2,2);
336 if(line_str_last2.compare("NM") == 0 )
337 {
338 isUseful = 0;
339 }
340 else if(line_str_last2.compare("MT") == 0 )
341 {
342 isUseful = 0;
343 }
344 else
345 {
346 isUseful = 1;
347 pline = line;
348 }
349 }
350 };
351
352
353
354 class ReadPair
355 {
356 public:
357 map<string, ReadPairInfo> mappinginfo;
358 ReadPair(map<string, vector<int> > &read1_mappos, map<string, vector<int> > &read2_mappos, int &read_size)
359 {
360 map<string, vector<int> >::iterator it1 = read1_mappos.begin();
361 for(;it1!=read1_mappos.end();it1++)
362 {
363 ReadPairInfo ReadPairInfo_tmp;
364 ReadPairInfo_tmp.IsoFlag.resize(2);
365 ReadPairInfo_tmp.Size.resize(2);
366 //ReadPairInfo_tmp.Pos.resize(4);
367 ReadPairInfo_tmp.Pos.assign(4,-1);
368 string eventid = it1->first;
369 if(read2_mappos.count(eventid) > 0)
370 {
371 ReadPairInfo_tmp.isPaired = 0;
372 for(int i=0;i<2;i++)
373 {
374 //cout<<read1_mappos[eventid][i]<<endl;
375 if(read2_mappos[eventid][i] == -1 || read1_mappos[eventid][i] == -1)
376 {
377 ReadPairInfo_tmp.IsoFlag[i] = 0;
378 ReadPairInfo_tmp.Size[i] = 0;
379 //discard reads like this
380 }
381 else
382 {
383
384 ReadPairInfo_tmp.isPaired = 1;
385 ReadPairInfo_tmp.IsoFlag[i] = 1;
386 ReadPairInfo_tmp.Size[i] = abs(read2_mappos[eventid][i]
387 -read1_mappos[eventid][i])
388 +read_size;
389 ReadPairInfo_tmp.Pos[2*i] = read1_mappos[eventid][i];
390 ReadPairInfo_tmp.Pos[2*i+1] = read2_mappos[eventid][i];
391 }
392 }
393 mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp));
394 }
395 else
396 {
397 ReadPairInfo_tmp.isPaired = 0;
398 for(int i=0;i<2;i++)
399 {
400 ReadPairInfo_tmp.Pos[2*i] = read1_mappos[eventid][i];
401 ReadPairInfo_tmp.Size[i] = 0;
402 if(read1_mappos[eventid][i] == -1)
403 {
404 ReadPairInfo_tmp.IsoFlag[i] = 0;
405
406 }
407 else
408 {
409 ReadPairInfo_tmp.IsoFlag[i] = 1;
410 }
411 }
412 mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp));
413 }
414 }
415 map<string, vector<int> >::iterator it2 = read2_mappos.begin();
416 for(;it2!=read2_mappos.end();it2++)
417 {
418 string eventid = it2->first;
419 if(mappinginfo.count(eventid)>0)
420 {
421 continue;
422 }
423 ReadPairInfo ReadPairInfo_tmp;
424 ReadPairInfo_tmp.IsoFlag.resize(2);
425 ReadPairInfo_tmp.Size.resize(2);
426 //ReadPairInfo_tmp.Pos.resize(4);
427 ReadPairInfo_tmp.Pos.assign(4,-1);
428 if(read1_mappos.count(eventid) > 0)
429 {
430 ReadPairInfo_tmp.isPaired = 0;
431 for(int i=0;i<2;i++)
432 {
433 if(read2_mappos[eventid][i] == -1 || read1_mappos[eventid][i] == -1)
434 {
435 ReadPairInfo_tmp.IsoFlag[i] = 0;
436 ReadPairInfo_tmp.Size[i] = 0;
437 }
438 else
439 {
440 ReadPairInfo_tmp.isPaired = 1;
441 ReadPairInfo_tmp.IsoFlag[i] = 1;
442 ReadPairInfo_tmp.Size[i] = abs(read2_mappos[eventid][i]
443 -read1_mappos[eventid][i]
444 +read_size);
445 ReadPairInfo_tmp.Pos[2*i] = read1_mappos[eventid][i];
446 ReadPairInfo_tmp.Pos[2*i+1]=read2_mappos[eventid][i];
447 }
448 }
449 mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp));
450 }
451 else
452 {
453 ReadPairInfo_tmp.isPaired = 0;
454 for(int i=0;i<2;i++)
455 {
456 ReadPairInfo_tmp.Pos[2*i+1]=read2_mappos[eventid][i];
457 ReadPairInfo_tmp.Size[i] = 0;
458 if(read2_mappos[eventid][i] == -1)
459 {
460 ReadPairInfo_tmp.IsoFlag[i] = 0;
461 }
462 else
463 {
464 ReadPairInfo_tmp.IsoFlag[i] = 1;
465 }
466 }
467 mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp));
468 }
469
470 }
471 }
472 };
473
474 int main (int argc, char **argv) {
475 int next_option;
476 int digit_optind = 0;
477 int read_size = 36;
478 char *dbfile_name = 0,
479 *IRM_file_name = 0,
480 *FZM_file_name = 0,
481 *output_file_prefix = 0,
482 *read1_nomt_file_name = 0,
483 *read2_nomt_file_name = 0;
484
485 while ( (next_option = getopt(argc, argv, "s:d:b:f:o:1:2:")) != -1) {
486 int this_option_optind = optind ? optind : 1;
487 switch (next_option) {
488 case 's': sscanf (optarg,"%d",&read_size); break;
489 case 'd': dbfile_name = optarg; break;
490 case 'b': IRM_file_name = optarg; break;
491 case 'f': FZM_file_name = optarg; break;
492 case 'o': output_file_prefix = optarg; break;
493 case '1': read1_nomt_file_name = optarg; break;
494 case '2': read2_nomt_file_name = optarg; break;
495 default: break;
496 }
497 }
498 if (!read_size||
499 !dbfile_name||
500 !FZM_file_name||
501 !output_file_prefix||
502 !read1_nomt_file_name||
503 !read2_nomt_file_name)
504 {
505 printusage();
506 return 0;
507 }
508 clock_t clock_start, clock_finish;
509 clock_start=clock();
510 string ratio_file_name = output_file_prefix;
511 ratio_file_name.append(".ratio");
512 string log_file_name = output_file_prefix;
513 log_file_name.append(".log");
514 string num_file_name = output_file_prefix;
515 num_file_name.append(".nums");
516
517 ofstream log_file(log_file_name.c_str());
518 cout<<"Log file name:\t"<<log_file_name<<endl;
519 cout<<"Parameters:"<<endl;
520 cout<<"Read size:\t"<<read_size<<endl;
521 log_file<<"#read_size:\t"<<read_size<<endl;
522 cout<<"Database:\t"<<dbfile_name<<endl;
523 log_file<<"#dbfile_name:\t"<<dbfile_name<<endl;
524 cout<<"Fragment size file name:\t"<<FZM_file_name<<endl;
525 log_file<<"#FZM_file_name:\t"<<FZM_file_name<<endl;
526 cout<<"Output file prefix:\t"<<output_file_prefix<<endl;
527 cout<<"Read end1 file:\t"<<read1_nomt_file_name<<endl;
528 cout<<"Read end2 file:\t"<<read2_nomt_file_name<<endl;
529 log_file<<"#read1_nomt_file_name:\t"<<read1_nomt_file_name<<endl;
530 log_file<<"#read2_nomt_file_name:\t"<<read2_nomt_file_name<<endl;
531 if(IRM_file_name)
532 {
533 cout<<"Inclusion ratio distn. file:\t"<<IRM_file_name<<endl;
534 log_file<<"#IRM_file_name:\t"<<IRM_file_name<<endl;
535
536 }
537 else
538 {
539 cout<<"Inclusion ratio distn. file:\tNA"<<endl;
540 log_file<<"#IRM_file_name:\tNA"<<endl;
541 }
542
543 //start reading files, dbfile first
544 cout<<"Reading database file..."<<endl;
545 map<string, TXdb_entry> TXdb_entries;
546 ifstream dbfile(dbfile_name);
547 char dbfile_line[MAX_LINE_LEN];
548 while(dbfile.getline(dbfile_line,MAX_LINE_LEN)){
549 TXdb_entry entry_tmp(dbfile_line);
550 if(entry_tmp.iscomplete)
551 {
552 TXdb_entries.insert(pair<string, TXdb_entry>
553 (entry_tmp.id,entry_tmp)
554 );
555 //cout<<entry_tmp.exonsize[1]<<endl;
556 }
557
558 }
559 dbfile.close();
560
561 //start reading bayes prior if there is
562 map<string, vector< double > > IRM;
563 IRM.insert(pair<string, vector<double> > ("CA", vector<double>()));
564 IRM.insert(pair<string, vector<double> > ("CS", vector<double>()));
565 IRM.insert(pair<string, vector<double> > ("IR", vector<double>()));
566 IRM.insert(pair<string, vector<double> > ("AA", vector<double>()));
567 IRM.insert(pair<string, vector<double> > ("AD", vector<double>()));
568 //IRM.insert(pair<string, vector<double> > ("AI", vector<double>()));
569 if(IRM_file_name)
570 {
571 cout<<"Reading inclusion ratio distn. file..."<<endl;
572 ifstream IRM_file(IRM_file_name);
573 char IRM_line[MAX_LINE_LEN];
574 IRM_file.getline(IRM_line, MAX_LINE_LEN);
575 while(IRM_file.getline(IRM_line, MAX_LINE_LEN))
576 {
577 char *token;
578 token = strtok(IRM_line, "\t");
579 if (token != NULL) {
580 double hist_tmp;
581 sscanf(token,"%lf",&hist_tmp);
582 IRM["CA"].push_back(hist_tmp);
583 }
584 token = strtok(NULL,"\t");
585 if (token != NULL) {
586 double hist_tmp;
587 sscanf(token,"%lf",&hist_tmp);
588 IRM["AD"].push_back(hist_tmp);
589 }
590 token = strtok(NULL,"\t");
591 if (token != NULL) {
592 double hist_tmp;
593 sscanf(token,"%lf",&hist_tmp);
594 IRM["AA"].push_back(hist_tmp);
595 }
596 /* token = strtok(NULL,"\t");
597 if (token != NULL) {
598 double hist_tmp;
599 sscanf(token,"%lf",&hist_tmp);
600 IRM["AI"].push_back(hist_tmp);
601 }*/
602 token = strtok(NULL,"\t");
603 if (token != NULL) {
604 double hist_tmp;
605 sscanf(token,"%lf",&hist_tmp);
606 IRM["IR"].push_back(hist_tmp);
607 }
608 token = strtok(NULL,"\t");
609 if (token != NULL) {
610 double hist_tmp;
611 sscanf(token,"%lf",&hist_tmp);
612 IRM["CS"].push_back(hist_tmp);
613 }
614
615 }
616 IRM_file.close();
617 }
618
619 //read FZM
620 cout<<"Reading fragment size distn. file..."<<endl;
621 ifstream FZM_file(FZM_file_name);
622 vector<double> FZM;
623 char FZM_file_line[MAX_LINE_LEN];
624 FZM_file.getline(FZM_file_line,MAX_LINE_LEN);
625 while(FZM_file.getline (FZM_file_line,MAX_LINE_LEN) )
626 {
627 double hist_tmp;
628 sscanf(FZM_file_line, "%lf", &hist_tmp) ;
629 FZM.push_back(hist_tmp);
630 }
631 FZM_file.close();
632 //start to read reads
633 cout<<"Reading reads from both ends...."<<endl;
634 ifstream read1_nomt_file(read1_nomt_file_name);
635 ifstream read2_nomt_file(read2_nomt_file_name);
636 char read1_line[MAX_LINE_LEN];
637 char read2_line[MAX_LINE_LEN];
638 int total_single_read_num=0;
639 int total_pair_read_num=0;
640 int total_read_num=0;
641 while(read1_nomt_file.getline(read1_line, MAX_LINE_LEN))
642 {
643 total_read_num++;
644 ReadInfo read1(read1_line);
645 map<string, vector<int> > read1_mappos;
646 if(read1.isUseful)
647 {
648 read1.scan(read1_mappos);
649 }
650 read2_nomt_file.getline(read2_line, MAX_LINE_LEN);
651 ReadInfo read2(read2_line);
652 map<string, vector<int> > read2_mappos;
653 if(read2.isUseful)
654 {
655 read2.scan(read2_mappos);
656 }
657 //pairing
658 ReadPair read_pair_ins (read1_mappos, read2_mappos, read_size);
659 //feed them into TXdb_entries
660 map<string , ReadPairInfo>::iterator it = read_pair_ins.mappinginfo.begin();
661 for(; it!=read_pair_ins.mappinginfo.end();it++)
662 {
663 string eventid = it->first;
664 map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.find(eventid);
665 if(txdb_it == TXdb_entries.end() )
666 {
667 continue;
668 }
669 //get numbers
670 for(int i=0; i<4;i++)
671 {
672 if(read_pair_ins.mappinginfo[eventid].Pos[i]!=-1)
673 {
674 int pos_tmp1 = read_pair_ins.mappinginfo[eventid].Pos[i];
675 int pos_tmp2;
676 if(i<2)
677 {
678 if (txdb_it->second.strand == '-')
679 pos_tmp1 = txdb_it->second.len1 -pos_tmp1 +1- read_size;
680 pos_tmp2 = pos_tmp1 + read_size;
681 if (pos_tmp2 <= txdb_it->second.exonsize[0])
682 { txdb_it->second.enum1++;continue;}
683 if (pos_tmp1 <= txdb_it->second.exonsize[0] )
684 {
685 if(read_pair_ins.mappinginfo[eventid].IsoFlag[1] !=1)
686 {
687 txdb_it->second.jnum12++;
688 }
689 else
690 {
691 txdb_it->second.enum1++;
692 }
693 continue;
694 }
695 if (pos_tmp2 <= txdb_it->second.exonsize[0]+txdb_it->second.exonsize[1])
696 { txdb_it->second.enum2++;continue;}
697 if (pos_tmp1 <= txdb_it->second.exonsize[0]+txdb_it->second.exonsize[1])
698 {
699 if(read_pair_ins.mappinginfo[eventid].IsoFlag[1] !=1)
700 {
701 txdb_it->second.jnum23++;
702 }
703 else
704 {
705 txdb_it->second.enum2++;
706 }
707 continue;
708
709 }
710 txdb_it->second.enum3++;
711 }
712 else
713 {
714 if (read_pair_ins.mappinginfo[eventid].IsoFlag[0] == 1
715 && read_pair_ins.mappinginfo[eventid].IsoFlag[1] == 1)
716 continue;
717 if (txdb_it->second.strand == '-')
718 pos_tmp1 = txdb_it->second.len2 -pos_tmp1 +1- read_size;
719 pos_tmp2 = pos_tmp1 + read_size;
720 if (pos_tmp2 <= txdb_it->second.exonsize[0])
721 { txdb_it->second.enum1++;continue;}
722 if (pos_tmp1 <= txdb_it->second.exonsize[0] )
723 {
724 if(read_pair_ins.mappinginfo[eventid].IsoFlag[0] !=1)
725 {
726 txdb_it->second.jnum13++;
727 }
728 else
729 {
730 txdb_it->second.enum1++;
731 }
732 continue;
733 }
734 txdb_it->second.enum3++;
735
736
737 }
738 }
739 }
740
741 if(read_pair_ins.mappinginfo[eventid].isPaired)
742 {
743 total_pair_read_num += 2;
744 txdb_it->second.totalreadnum += 2;
745 (txdb_it->second).pairs.push_back(read_pair_ins.mappinginfo[eventid]);
746 }
747 else
748 {
749 total_single_read_num += 2;
750 txdb_it->second.totalreadnum += 1;
751 if(read_pair_ins.mappinginfo[eventid].IsoFlag[0] == 1)
752 {
753 if(read_pair_ins.mappinginfo[eventid].IsoFlag[1] == 0)
754 {
755 (txdb_it->second).snum1++;
756 }
757 else
758 {
759 (txdb_it->second).snum12++;
760 }
761 }
762 else
763 {
764 (txdb_it->second).snum2++;
765 }
766 }
767 }
768
769
770 }
771
772 read1_nomt_file.close();
773 read2_nomt_file.close();
774 cout<<"Total read number:\t"<<total_read_num*2<<endl;
775 cout<<"Singled reads used:\t"<<total_single_read_num<<endl;
776 cout<<"Paired reads used:\t"<<total_pair_read_num<<endl;
777
778 log_file<<"#total_read_num:\t"<<total_read_num*2<<endl;
779 log_file<<"#total_single_read_num:\t"<<total_single_read_num<<endl;
780 log_file<<"#total_pair_read_num:\t"<<total_pair_read_num<<endl;
781
782 //Calculate ratios and output
783 cout<<"Now doing estimations and output to "<<ratio_file_name<<"\t"<<num_file_name<<endl;
784 ofstream ratio_file(ratio_file_name.c_str());
785 ofstream num_file(num_file_name.c_str());
786 map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.begin();
787 for(;txdb_it!=TXdb_entries.end();txdb_it++)
788 {
789 string eventid = txdb_it->first;
790 //cout<< eventid <<endl;
791 txdb_it->second.estimate(FZM,IRM,read_size);
792 /* ratio_file<< eventid<<"\t"
793 <<setprecision(4)<<fixed
794 <<txdb_it->second.ir<<"\t"
795 <<txdb_it->second.bir<<"\t"
796 <<"NA\tNA\tNA\t"
797 <<txdb_it->second.jnum12<<"\t"
798 <<txdb_it->second.jnum23<<"\t"
799 <<txdb_it->second.jnum13<<"\t"
800 <<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\t"
801 <<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\t"
802 <<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\t"
803 <<txdb_it->second.exonsize[0]<<"\t"
804 <<txdb_it->second.exonsize[1]<<"\t"
805 <<txdb_it->second.exonsize[2]<<"\t"
806 <<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<endl;
807 */
808 ratio_file<<eventid<<"\t"
809 <<setprecision(4)<<fixed
810 <<txdb_it->second.ir<<"\t" //1
811 <<txdb_it->second.bir<<"\t" //2
812 <<txdb_it->second.chrid<<"\t" //3
813 <<txdb_it->second.start<<"," //
814 <<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\t" //4
815 <<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","
816 <<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\t"//5
817 <<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","
818 <<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\t"//6
819 <<txdb_it->second.strand<<"\t"//7
820 <<txdb_it->second.jnum12<<"\t"//8
821 <<txdb_it->second.jnum23<<"\t"//9
822 <<txdb_it->second.jnum13<<"\t"//10
823 <<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\t"//11
824 <<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\t"//12
825 <<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\t"//13`
826 <<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<"\t"//14
827 <<txdb_it->second.exonsize[0]<<"\t"//15
828 <<txdb_it->second.exonsize[1]<<"\t"//16
829 <<txdb_it->second.exonsize[2]<<endl;//17
830
831 num_file<< eventid<<"\t"
832 <<txdb_it->second.chrid<<"\t"
833 <<txdb_it->second.start<<","
834 <<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\t"
835 <<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","
836 <<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\t"
837 <<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","
838 <<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\t"
839 <<txdb_it->second.strand<<"\t"
840 <<txdb_it->second.enum1<<"\t"
841 <<txdb_it->second.enum2<<"\t"
842 <<txdb_it->second.enum3<<"\t"
843 <<txdb_it->second.jnum12<<"\t"
844 <<txdb_it->second.jnum23<<"\t"
845 <<txdb_it->second.jnum13<<endl;
846 }
847 log_file.close();
848 ratio_file.close();
849 num_file.close();
850 clock_finish = clock();
851 cout<<"Done! time used:"<<(double)(clock_finish-clock_start)/CLOCKS_PER_SEC<<" seconds"<<endl;
852
853
854 }