| 1 | 1 //Author: Jie Wu@CSHL | 
|  | 2 //TO replace the original Perl code | 
|  | 3 | 
|  | 4 #include <stdio.h> | 
|  | 5 #include <stdlib.h> | 
|  | 6 #include <iostream> | 
|  | 7 #include <fstream> | 
|  | 8 #include <string.h> | 
|  | 9 #include <unistd.h> | 
|  | 10 #include <sstream> | 
|  | 11 #include <map> | 
|  | 12 #include <vector> | 
|  | 13 #include <math.h> | 
|  | 14 #include <time.h> | 
|  | 15 #include <limits> | 
|  | 16 #include <iomanip> | 
|  | 17 | 
|  | 18 using namespace std; | 
|  | 19 | 
|  | 20 int MAX_LINE_LEN = 1024*1024; | 
|  | 21 | 
|  | 22 void printusage() | 
|  | 23 { | 
|  | 24     cout<< "\tUsage:"<<endl; | 
|  | 25     cout<<"\t-s\tread_size"<<endl; | 
|  | 26     cout<<"\t-b\tIRM file"<<endl; | 
|  | 27     cout<<"\t-f\tFZM file"<<endl; | 
|  | 28     cout<<"\t-1\tmapping result 1"<<endl; | 
|  | 29     cout<<"\t-2\tmapping result 2"<<endl; | 
|  | 30     cout<<"\t-d\tTXdb database file"<<endl; | 
|  | 31     cout<<"\t-o\toutput prefix"<<endl; | 
|  | 32 | 
|  | 33 } | 
|  | 34 | 
|  | 35 struct ReadPairInfo | 
|  | 36 { | 
|  | 37 	bool isPaired; | 
|  | 38 	vector<int> IsoFlag; //flag as 1 if mapped to the isoform | 
|  | 39 	vector<int> Size; //fragment size in the isoform | 
|  | 40 	vector<int> Pos; | 
|  | 41 }; | 
|  | 42 | 
|  | 43 class TXdb_entry{ | 
|  | 44 	public: | 
|  | 45 	 string chrid; | 
|  | 46 	 //int start; | 
|  | 47 	 //int end; | 
|  | 48 	 string id; | 
|  | 49 	 bool iscomplete; | 
|  | 50 	 int start; | 
|  | 51 	 int end; | 
|  | 52 	 int exonstarts[3]; | 
|  | 53 	 char strand; | 
|  | 54 	 int exonsize[3]; | 
|  | 55 	 int len1; | 
|  | 56 	 int len2; | 
|  | 57 	 vector< ReadPairInfo > pairs; | 
|  | 58 	 int snum1; //single-end reads | 
|  | 59 	 int snum2; | 
|  | 60 	 int snum12; | 
|  | 61 	 float ir, bir; | 
|  | 62 	 int enum1; //exon body reads | 
|  | 63 	 int enum2; | 
|  | 64 	 int enum3; | 
|  | 65 | 
|  | 66 	 int jnum12; | 
|  | 67 	 int jnum23; | 
|  | 68 	 int jnum13; | 
|  | 69 | 
|  | 70 	 int totalreadnum; | 
|  | 71 	 void estimate(vector<double> &FZM, map<string, vector<double> > &IRM, int &read_size) | 
|  | 72 	 { | 
|  | 73 		 float Maxe=0; | 
|  | 74 		 float Max = -numeric_limits<float>::max(); | 
|  | 75 		 float BMaxe=0; | 
|  | 76 		 float BMax=-numeric_limits<float>::max(); | 
|  | 77 		 for(float e1=0.001; e1<1;e1=e1+0.001) | 
|  | 78 		 { | 
|  | 79 			 float efix1 = e1*len1/ | 
|  | 80 			               (e1*exonsize[1]+len2); | 
|  | 81 			 float efix2 = 1-efix1; | 
|  | 82 			 float LL = snum12*log( efix1/(len1-read_size+1) | 
|  | 83 			          + efix2/(len2-read_size+1) ) | 
|  | 84 			          + snum1*log(efix1/(len1-read_size+1)) | 
|  | 85 				  + snum2*log(efix2/(len2-read_size+1)); | 
|  | 86 				  //cout<<LL<<endl; | 
|  | 87 				  //cout<<efix1<<"\t"  <<pairs.size()<<"\t"  <<len2<<"\t"	<<endl; | 
|  | 88 		         //int num1=0, num2=0,num12=0; | 
|  | 89 		         for(int i=0;i<pairs.size();i++) | 
|  | 90 			  { | 
|  | 91                             if(pairs[i].IsoFlag[0] == 1) | 
|  | 92 			    { | 
|  | 93 				    if(pairs[i].IsoFlag[1] == 0) | 
|  | 94 				    { | 
|  | 95 					   // num1++; | 
|  | 96 					    float tmp=FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1); | 
|  | 97 					    if (tmp == 0) | 
|  | 98 					    { | 
|  | 99 					    	 LL = LL - 308; | 
|  | 100 					    } | 
|  | 101 					    else | 
|  | 102 					    { | 
|  | 103 						 LL = LL + log(tmp); | 
|  | 104 					    } | 
|  | 105 				    } | 
|  | 106 				    else | 
|  | 107 				    { | 
|  | 108 					   // num12++; | 
|  | 109 					    float tmp=(FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1)) | 
|  | 110 					          + (FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1)); | 
|  | 111 				  	    if(tmp ==0) | 
|  | 112 					    { | 
|  | 113 					    LL = LL -308; | 
|  | 114 					    } | 
|  | 115 					    else | 
|  | 116 					    { | 
|  | 117 						    LL = LL + log(tmp); | 
|  | 118 					    } | 
|  | 119 				    } | 
|  | 120 			    } | 
|  | 121 			    else | 
|  | 122 			    { | 
|  | 123 				    //num2++; | 
|  | 124 				    float tmp = FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1); | 
|  | 125 				    if(tmp ==0) | 
|  | 126 				    { | 
|  | 127 				    LL= LL - 308; | 
|  | 128 				    } | 
|  | 129 				    else | 
|  | 130 				    { | 
|  | 131 					    LL = LL +log(tmp); | 
|  | 132 				    } | 
|  | 133 			    } | 
|  | 134 			  } | 
|  | 135 			  //cout<<num1<<"\t"<<num2<<"\t"<<num12<<"\n"; | 
|  | 136 			  //cout<<LL<<endl; | 
|  | 137 | 
|  | 138 			  if(IRM["CA"].size() > 0) | 
|  | 139 			  { | 
|  | 140 				  float BLL; | 
|  | 141 				  string eventtype =id.substr(0,2); | 
|  | 142 				  if(!eventtype.compare("ME")) | 
|  | 143 				  { | 
|  | 144 					  eventtype = "CA"; | 
|  | 145 				  } | 
|  | 146 				  BLL = LL + log(IRM[eventtype][int(e1/0.001)]); | 
|  | 147 				  if(BLL > BMax) | 
|  | 148 				  { | 
|  | 149 					  BMax = BLL; | 
|  | 150 					  BMaxe = e1; | 
|  | 151 				  } | 
|  | 152 			  } | 
|  | 153 				  //cout<<LL<<endl; | 
|  | 154 			  if(LL > Max) | 
|  | 155 			  { | 
|  | 156 				  Max= LL; | 
|  | 157 				  Maxe = e1; | 
|  | 158 | 
|  | 159 			  } | 
|  | 160 | 
|  | 161 		 } | 
|  | 162 		 ir= Maxe; | 
|  | 163 		 bir= BMaxe; | 
|  | 164 	 } | 
|  | 165 | 
|  | 166 	 TXdb_entry(char *line) | 
|  | 167 	 { | 
|  | 168 		 size_t found; | 
|  | 169 		 string linestr(line); | 
|  | 170 		 found = linestr.find("[L]"); | 
|  | 171 		 if(found != string::npos) | 
|  | 172 		 { | 
|  | 173 			 iscomplete=1; | 
|  | 174 | 
|  | 175 		 	char *token; | 
|  | 176 | 
|  | 177 		 	token = strtok(line,"\t"); | 
|  | 178 		 	if(token==NULL) | 
|  | 179 		 	{ | 
|  | 180 				 cout<<"error1"<<endl; | 
|  | 181 				 exit(0); | 
|  | 182 			 } | 
|  | 183 			 else | 
|  | 184 			 { | 
|  | 185 				 chrid = token; | 
|  | 186 			 } | 
|  | 187 			 token = strtok(NULL,"\t"); | 
|  | 188 			 if(token==NULL) | 
|  | 189 			 { | 
|  | 190 				 cout<<"error2"<<endl; | 
|  | 191 				 exit(0); | 
|  | 192 			 } | 
|  | 193 			 else | 
|  | 194 			 { | 
|  | 195 				 sscanf(token,"%d",&start); | 
|  | 196 			 } | 
|  | 197 			 token = strtok(NULL,"\t"); | 
|  | 198         	         if(token==NULL) | 
|  | 199                 	 { | 
|  | 200                         	 cout<<"error3"<<endl; | 
|  | 201 	                         exit(0); | 
|  | 202         	         } | 
|  | 203                 	 else | 
|  | 204 	                 { | 
|  | 205         	                 sscanf(token,"%d",&end); | 
|  | 206 	                 } | 
|  | 207 			 token = strtok(NULL,"\t"); | 
|  | 208 			 if(token==NULL) | 
|  | 209 			 { | 
|  | 210 				 cout<<"error4"<<endl;exit(0); | 
|  | 211 			 } | 
|  | 212 			 else | 
|  | 213 			 { | 
|  | 214 				 id = token; | 
|  | 215 				 id = id.substr(0,id.size()-3); | 
|  | 216 			 } | 
|  | 217 			 token = strtok(NULL,"\t"); | 
|  | 218 			 token = strtok(NULL,"\t"); | 
|  | 219 			 if(token==NULL) | 
|  | 220 			 { | 
|  | 221 				  cout<<"error5"<<endl;exit(0); | 
|  | 222 			 } | 
|  | 223 			 else | 
|  | 224 			 { | 
|  | 225 				 sscanf(token,"%c",&strand); | 
|  | 226 			 } | 
|  | 227 			 for(int i=0;i<5;i++) | 
|  | 228 			 { | 
|  | 229 				 token=strtok(NULL,"\t"); | 
|  | 230 			 } | 
|  | 231 			 if(token==NULL){ | 
|  | 232 				 cout<<"error6"<<endl;exit(0); | 
|  | 233 			 } | 
|  | 234 			 else | 
|  | 235 			 { | 
|  | 236 				 string str_tmp(token); | 
|  | 237 				 size_t found_tmp; | 
|  | 238 				 int *pexonsize = exonsize; | 
|  | 239 				 while((found_tmp = str_tmp.find(",")) != string::npos){ | 
|  | 240 					 sscanf((str_tmp.substr(0,found_tmp)).c_str(),"%d",pexonsize); | 
|  | 241 					 pexonsize++; | 
|  | 242 					 str_tmp.erase(0,found_tmp+1); | 
|  | 243 				 } | 
|  | 244 				 sscanf(str_tmp.c_str(),"%d",pexonsize); | 
|  | 245 | 
|  | 246 			 } | 
|  | 247 			 token=strtok(NULL,"\t"); | 
|  | 248 | 
|  | 249 			if(token==NULL){ | 
|  | 250 				 cout<<"error7"<<endl;exit(0); | 
|  | 251 			 } | 
|  | 252 			 else | 
|  | 253 			 { | 
|  | 254 				 string str_tmp(token); | 
|  | 255 				 size_t found_tmp; | 
|  | 256 				 int *pexonstarts = exonstarts; | 
|  | 257 				 while((found_tmp = str_tmp.find(",")) != string::npos){ | 
|  | 258 					 sscanf((str_tmp.substr(0,found_tmp)).c_str(),"%d",pexonstarts); | 
|  | 259 					 pexonstarts++; | 
|  | 260 					 str_tmp.erase(0,found_tmp+1); | 
|  | 261 				 } | 
|  | 262 				 sscanf(str_tmp.c_str(),"%d",pexonstarts); | 
|  | 263 | 
|  | 264 			 } | 
|  | 265 		 len1 = exonsize[0]+exonsize[1]+exonsize[2]; | 
|  | 266 			 len2 = exonsize[0]+exonsize[2]; | 
|  | 267 			 snum12=0; | 
|  | 268 			 snum1 =0; | 
|  | 269 			 snum2 =0; | 
|  | 270 			 enum1=0; | 
|  | 271 			 enum2=0; | 
|  | 272 			 enum3=0; | 
|  | 273 			 jnum12=0; | 
|  | 274 			 jnum13=0; | 
|  | 275 			 jnum23=0; | 
|  | 276 			 totalreadnum=0; | 
|  | 277 		 } | 
|  | 278 		else | 
|  | 279 		{ | 
|  | 280 			iscomplete = 0; | 
|  | 281 		} | 
|  | 282 	 } | 
|  | 283 }; | 
|  | 284 | 
|  | 285 class ReadInfo | 
|  | 286 { | 
|  | 287 	public: | 
|  | 288 	bool isUseful; | 
|  | 289 	char *pline; | 
|  | 290 	void scan(map<string, vector<int> > &map_pos ) | 
|  | 291 	//eventname, pos in L/S iso | 
|  | 292 	{ | 
|  | 293 		char *token; | 
|  | 294 		token = strtok(pline, "\t"); | 
|  | 295 		token = strtok(NULL, "\t"); | 
|  | 296 		token = strtok(NULL, "\t"); | 
|  | 297 		token = strtok(NULL, "\t"); | 
|  | 298 		string mapinfo(token); | 
|  | 299 		size_t  beg, end; | 
|  | 300 		end = 0; | 
|  | 301 		while( (beg= mapinfo.find_first_of("/", end))!=string::npos) | 
|  | 302 		{ | 
|  | 303 			beg ++; | 
|  | 304 			end = mapinfo.find_first_of("[",beg); | 
|  | 305 			string eventid = mapinfo.substr( beg, end-beg ) ; | 
|  | 306 			vector<int> pos(2,-1); | 
|  | 307 			if(!map_pos.count(eventid)) | 
|  | 308 			{ | 
|  | 309 				map_pos.insert(pair<string, vector<int> > | 
|  | 310 					(eventid, pos) ); | 
|  | 311 			} | 
|  | 312 			end ++; | 
|  | 313 			string LorS = mapinfo.substr( end,1 ); | 
|  | 314 			beg = mapinfo.find_first_of(":",end ); | 
|  | 315 			beg ++; | 
|  | 316 			end = mapinfo.find_first_of(",",beg ); | 
|  | 317 			string pos_str(mapinfo.substr(beg, end-2-beg)); | 
|  | 318 | 
|  | 319 			if(LorS.compare("L") == 0) | 
|  | 320 			{ | 
|  | 321 				sscanf(pos_str.c_str(),"%d",&(map_pos[eventid][0]) ); | 
|  | 322 			} | 
|  | 323 			else | 
|  | 324 			{ | 
|  | 325 				sscanf(pos_str.c_str(),"%d",&(map_pos[eventid][1]) ); | 
|  | 326 			} | 
|  | 327 | 
|  | 328 			//cout<<eventid<<"\t"<<map_pos[eventid][0]<<"\t"<<map_pos[eventid][1]<<endl; | 
|  | 329 		} | 
|  | 330 | 
|  | 331 	} | 
|  | 332 	ReadInfo(char *line) | 
|  | 333 	{ | 
|  | 334 		string line_str(line); | 
|  | 335 		string line_str_last2=line_str.substr(line_str.size()-2,2); | 
|  | 336 		if(line_str_last2.compare("NM") == 0 ) | 
|  | 337 		{ | 
|  | 338 			isUseful = 0; | 
|  | 339 		} | 
|  | 340 		else if(line_str_last2.compare("MT") == 0 ) | 
|  | 341 		{ | 
|  | 342 			isUseful = 0; | 
|  | 343 		} | 
|  | 344 		else | 
|  | 345 		{ | 
|  | 346 			isUseful = 1; | 
|  | 347 			pline = line; | 
|  | 348 		} | 
|  | 349 	} | 
|  | 350 }; | 
|  | 351 | 
|  | 352 | 
|  | 353 | 
|  | 354 class ReadPair | 
|  | 355 { | 
|  | 356 	public: | 
|  | 357 	map<string, ReadPairInfo> mappinginfo; | 
|  | 358 	ReadPair(map<string, vector<int> > &read1_mappos, map<string, vector<int> > &read2_mappos, int &read_size) | 
|  | 359 	{ | 
|  | 360 		map<string, vector<int> >::iterator it1 = read1_mappos.begin(); | 
|  | 361 		for(;it1!=read1_mappos.end();it1++) | 
|  | 362 		{ | 
|  | 363 			ReadPairInfo ReadPairInfo_tmp; | 
|  | 364 			ReadPairInfo_tmp.IsoFlag.resize(2); | 
|  | 365 			ReadPairInfo_tmp.Size.resize(2); | 
|  | 366 			//ReadPairInfo_tmp.Pos.resize(4); | 
|  | 367 			ReadPairInfo_tmp.Pos.assign(4,-1); | 
|  | 368 			string eventid = it1->first; | 
|  | 369 			if(read2_mappos.count(eventid) > 0) | 
|  | 370 			{ | 
|  | 371 				ReadPairInfo_tmp.isPaired = 0; | 
|  | 372 				for(int i=0;i<2;i++) | 
|  | 373 				{ | 
|  | 374 					//cout<<read1_mappos[eventid][i]<<endl; | 
|  | 375 					if(read2_mappos[eventid][i] == -1 || read1_mappos[eventid][i] == -1) | 
|  | 376 					{ | 
|  | 377 						ReadPairInfo_tmp.IsoFlag[i] = 0; | 
|  | 378 						ReadPairInfo_tmp.Size[i]    = 0; | 
|  | 379 						//discard reads like this | 
|  | 380 					} | 
|  | 381 					else | 
|  | 382 					{ | 
|  | 383 | 
|  | 384 						ReadPairInfo_tmp.isPaired = 1; | 
|  | 385 						ReadPairInfo_tmp.IsoFlag[i] = 1; | 
|  | 386 						ReadPairInfo_tmp.Size[i]    = abs(read2_mappos[eventid][i] | 
|  | 387 								-read1_mappos[eventid][i]) | 
|  | 388 								+read_size; | 
|  | 389 						ReadPairInfo_tmp.Pos[2*i]    = read1_mappos[eventid][i]; | 
|  | 390 						ReadPairInfo_tmp.Pos[2*i+1]    = read2_mappos[eventid][i]; | 
|  | 391 					} | 
|  | 392 				} | 
|  | 393 				mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp)); | 
|  | 394 			} | 
|  | 395 			else | 
|  | 396 			{ | 
|  | 397 				ReadPairInfo_tmp.isPaired = 0; | 
|  | 398 				for(int i=0;i<2;i++) | 
|  | 399 				{ | 
|  | 400 					ReadPairInfo_tmp.Pos[2*i]    = read1_mappos[eventid][i]; | 
|  | 401 					ReadPairInfo_tmp.Size[i] = 0; | 
|  | 402 					if(read1_mappos[eventid][i] == -1) | 
|  | 403 					{ | 
|  | 404 						ReadPairInfo_tmp.IsoFlag[i] = 0; | 
|  | 405 | 
|  | 406 					} | 
|  | 407 					else | 
|  | 408 					{ | 
|  | 409 						ReadPairInfo_tmp.IsoFlag[i] = 1; | 
|  | 410 					} | 
|  | 411 				} | 
|  | 412 				mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp)); | 
|  | 413 			} | 
|  | 414 		} | 
|  | 415 		map<string, vector<int> >::iterator it2 = read2_mappos.begin(); | 
|  | 416 		for(;it2!=read2_mappos.end();it2++) | 
|  | 417 		{ | 
|  | 418 			string eventid = it2->first; | 
|  | 419 			if(mappinginfo.count(eventid)>0) | 
|  | 420 			{ | 
|  | 421 				continue; | 
|  | 422 			} | 
|  | 423 			ReadPairInfo ReadPairInfo_tmp; | 
|  | 424 			ReadPairInfo_tmp.IsoFlag.resize(2); | 
|  | 425 			ReadPairInfo_tmp.Size.resize(2); | 
|  | 426 			//ReadPairInfo_tmp.Pos.resize(4); | 
|  | 427 			ReadPairInfo_tmp.Pos.assign(4,-1); | 
|  | 428 			if(read1_mappos.count(eventid) > 0) | 
|  | 429 			{ | 
|  | 430 				ReadPairInfo_tmp.isPaired = 0; | 
|  | 431 				for(int i=0;i<2;i++) | 
|  | 432 				{ | 
|  | 433 					if(read2_mappos[eventid][i] == -1 || read1_mappos[eventid][i] == -1) | 
|  | 434 					{ | 
|  | 435 						ReadPairInfo_tmp.IsoFlag[i] = 0; | 
|  | 436 						ReadPairInfo_tmp.Size[i]    = 0; | 
|  | 437 					} | 
|  | 438 					else | 
|  | 439 					{ | 
|  | 440 						ReadPairInfo_tmp.isPaired = 1; | 
|  | 441 						ReadPairInfo_tmp.IsoFlag[i] = 1; | 
|  | 442 						ReadPairInfo_tmp.Size[i]    = abs(read2_mappos[eventid][i] | 
|  | 443 								-read1_mappos[eventid][i] | 
|  | 444 								+read_size); | 
|  | 445 						ReadPairInfo_tmp.Pos[2*i] = read1_mappos[eventid][i]; | 
|  | 446 						ReadPairInfo_tmp.Pos[2*i+1]=read2_mappos[eventid][i]; | 
|  | 447 					} | 
|  | 448 				} | 
|  | 449 				mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp)); | 
|  | 450 			} | 
|  | 451 			else | 
|  | 452 			{ | 
|  | 453 				ReadPairInfo_tmp.isPaired = 0; | 
|  | 454 				for(int i=0;i<2;i++) | 
|  | 455 				{ | 
|  | 456 					ReadPairInfo_tmp.Pos[2*i+1]=read2_mappos[eventid][i]; | 
|  | 457 					ReadPairInfo_tmp.Size[i] = 0; | 
|  | 458 					if(read2_mappos[eventid][i] == -1) | 
|  | 459 					{ | 
|  | 460 						ReadPairInfo_tmp.IsoFlag[i] = 0; | 
|  | 461 					} | 
|  | 462 					else | 
|  | 463 					{ | 
|  | 464 						ReadPairInfo_tmp.IsoFlag[i] = 1; | 
|  | 465 					} | 
|  | 466 				} | 
|  | 467 				mappinginfo.insert (pair<string, ReadPairInfo>(eventid, ReadPairInfo_tmp)); | 
|  | 468 			} | 
|  | 469 | 
|  | 470 		} | 
|  | 471 	} | 
|  | 472 }; | 
|  | 473 | 
|  | 474 int main (int argc, char **argv) { | 
|  | 475     int next_option; | 
|  | 476     int digit_optind = 0; | 
|  | 477     int read_size = 36; | 
|  | 478     char *dbfile_name = 0, | 
|  | 479     	 *IRM_file_name = 0, | 
|  | 480 	 *FZM_file_name = 0, | 
|  | 481 	 *output_file_prefix = 0, | 
|  | 482 	 *read1_nomt_file_name = 0, | 
|  | 483 	 *read2_nomt_file_name = 0; | 
|  | 484 | 
|  | 485     while ( (next_option = getopt(argc, argv, "s:d:b:f:o:1:2:")) != -1) { | 
|  | 486         int this_option_optind = optind ? optind : 1; | 
|  | 487         switch (next_option) { | 
|  | 488 	case 's':	sscanf (optarg,"%d",&read_size);	break; | 
|  | 489         case 'd':	dbfile_name = optarg;	break; | 
|  | 490         case 'b':	IRM_file_name = optarg;	break; | 
|  | 491         case 'f':	FZM_file_name  = optarg;	break; | 
|  | 492 	case 'o':	output_file_prefix = optarg;	break; | 
|  | 493         case '1':	read1_nomt_file_name = optarg;	break; | 
|  | 494         case '2':	read2_nomt_file_name = optarg;	break; | 
|  | 495 	default:	break; | 
|  | 496 	} | 
|  | 497       } | 
|  | 498     if (!read_size|| | 
|  | 499 	    !dbfile_name|| | 
|  | 500 	    !FZM_file_name|| | 
|  | 501 	    !output_file_prefix|| | 
|  | 502 	    !read1_nomt_file_name|| | 
|  | 503 	    !read2_nomt_file_name) | 
|  | 504       { | 
|  | 505 	  printusage(); | 
|  | 506 	  return 0; | 
|  | 507       } | 
|  | 508       clock_t clock_start, clock_finish; | 
|  | 509       clock_start=clock(); | 
|  | 510       string ratio_file_name = output_file_prefix; | 
|  | 511       ratio_file_name.append(".ratio"); | 
|  | 512       string log_file_name = output_file_prefix; | 
|  | 513       log_file_name.append(".log"); | 
|  | 514       string num_file_name = output_file_prefix; | 
|  | 515       num_file_name.append(".nums"); | 
|  | 516 | 
|  | 517       ofstream log_file(log_file_name.c_str()); | 
|  | 518       cout<<"Log file name:\t"<<log_file_name<<endl; | 
|  | 519       cout<<"Parameters:"<<endl; | 
|  | 520       cout<<"Read size:\t"<<read_size<<endl; | 
|  | 521       log_file<<"#read_size:\t"<<read_size<<endl; | 
|  | 522       cout<<"Database:\t"<<dbfile_name<<endl; | 
|  | 523       log_file<<"#dbfile_name:\t"<<dbfile_name<<endl; | 
|  | 524       cout<<"Fragment size file name:\t"<<FZM_file_name<<endl; | 
|  | 525       log_file<<"#FZM_file_name:\t"<<FZM_file_name<<endl; | 
|  | 526       cout<<"Output file prefix:\t"<<output_file_prefix<<endl; | 
|  | 527       cout<<"Read end1 file:\t"<<read1_nomt_file_name<<endl; | 
|  | 528       cout<<"Read end2 file:\t"<<read2_nomt_file_name<<endl; | 
|  | 529       log_file<<"#read1_nomt_file_name:\t"<<read1_nomt_file_name<<endl; | 
|  | 530       log_file<<"#read2_nomt_file_name:\t"<<read2_nomt_file_name<<endl; | 
|  | 531       if(IRM_file_name) | 
|  | 532       { | 
|  | 533 	      cout<<"Inclusion ratio distn. file:\t"<<IRM_file_name<<endl; | 
|  | 534 	      log_file<<"#IRM_file_name:\t"<<IRM_file_name<<endl; | 
|  | 535 | 
|  | 536       } | 
|  | 537       else | 
|  | 538       { | 
|  | 539 	      cout<<"Inclusion ratio distn. file:\tNA"<<endl; | 
|  | 540 	      log_file<<"#IRM_file_name:\tNA"<<endl; | 
|  | 541       } | 
|  | 542 | 
|  | 543       //start reading files, dbfile first | 
|  | 544       cout<<"Reading database file..."<<endl; | 
|  | 545       map<string, TXdb_entry> TXdb_entries; | 
|  | 546       ifstream dbfile(dbfile_name); | 
|  | 547       char dbfile_line[MAX_LINE_LEN]; | 
|  | 548       while(dbfile.getline(dbfile_line,MAX_LINE_LEN)){ | 
|  | 549 	      TXdb_entry entry_tmp(dbfile_line); | 
|  | 550 	      if(entry_tmp.iscomplete) | 
|  | 551 	      { | 
|  | 552 		TXdb_entries.insert(pair<string, TXdb_entry> | 
|  | 553 				(entry_tmp.id,entry_tmp) | 
|  | 554 				); | 
|  | 555 	      	//cout<<entry_tmp.exonsize[1]<<endl; | 
|  | 556 	      } | 
|  | 557 | 
|  | 558       } | 
|  | 559       dbfile.close(); | 
|  | 560 | 
|  | 561       //start reading bayes prior if there is | 
|  | 562       map<string, vector< double > >  IRM; | 
|  | 563       IRM.insert(pair<string, vector<double> > ("CA", vector<double>())); | 
|  | 564       IRM.insert(pair<string, vector<double> > ("CS", vector<double>())); | 
|  | 565       IRM.insert(pair<string, vector<double> > ("IR", vector<double>())); | 
|  | 566       IRM.insert(pair<string, vector<double> > ("AA", vector<double>())); | 
|  | 567       IRM.insert(pair<string, vector<double> > ("AD", vector<double>())); | 
|  | 568       //IRM.insert(pair<string, vector<double> > ("AI", vector<double>())); | 
|  | 569       if(IRM_file_name) | 
|  | 570       { | 
|  | 571 	cout<<"Reading inclusion ratio distn. file..."<<endl; | 
|  | 572 	ifstream IRM_file(IRM_file_name); | 
|  | 573 	char IRM_line[MAX_LINE_LEN]; | 
|  | 574 	IRM_file.getline(IRM_line, MAX_LINE_LEN); | 
|  | 575 	while(IRM_file.getline(IRM_line, MAX_LINE_LEN)) | 
|  | 576 	{ | 
|  | 577 		char *token; | 
|  | 578 		token = strtok(IRM_line, "\t"); | 
|  | 579 		if (token != NULL) { | 
|  | 580 			double hist_tmp; | 
|  | 581 			sscanf(token,"%lf",&hist_tmp); | 
|  | 582 			IRM["CA"].push_back(hist_tmp); | 
|  | 583 		} | 
|  | 584 		token = strtok(NULL,"\t"); | 
|  | 585 		if (token != NULL) { | 
|  | 586 			double hist_tmp; | 
|  | 587 			sscanf(token,"%lf",&hist_tmp); | 
|  | 588 			IRM["AD"].push_back(hist_tmp); | 
|  | 589 		} | 
|  | 590 		token = strtok(NULL,"\t"); | 
|  | 591 		if (token != NULL) { | 
|  | 592 			double hist_tmp; | 
|  | 593 			sscanf(token,"%lf",&hist_tmp); | 
|  | 594 			IRM["AA"].push_back(hist_tmp); | 
|  | 595 		} | 
|  | 596 /*		token = strtok(NULL,"\t"); | 
|  | 597 		if (token != NULL) { | 
|  | 598 			double hist_tmp; | 
|  | 599 			sscanf(token,"%lf",&hist_tmp); | 
|  | 600 			IRM["AI"].push_back(hist_tmp); | 
|  | 601 		}*/ | 
|  | 602 		token = strtok(NULL,"\t"); | 
|  | 603 		if (token != NULL) { | 
|  | 604 			double hist_tmp; | 
|  | 605 			sscanf(token,"%lf",&hist_tmp); | 
|  | 606 			IRM["IR"].push_back(hist_tmp); | 
|  | 607 		} | 
|  | 608 		token = strtok(NULL,"\t"); | 
|  | 609 		if (token != NULL) { | 
|  | 610 			double hist_tmp; | 
|  | 611 			sscanf(token,"%lf",&hist_tmp); | 
|  | 612 			IRM["CS"].push_back(hist_tmp); | 
|  | 613 		} | 
|  | 614 | 
|  | 615 	} | 
|  | 616 	IRM_file.close(); | 
|  | 617       } | 
|  | 618 | 
|  | 619       //read FZM | 
|  | 620       cout<<"Reading fragment size distn. file..."<<endl; | 
|  | 621       ifstream FZM_file(FZM_file_name); | 
|  | 622       vector<double> FZM; | 
|  | 623       char FZM_file_line[MAX_LINE_LEN]; | 
|  | 624       FZM_file.getline(FZM_file_line,MAX_LINE_LEN); | 
|  | 625       while(FZM_file.getline (FZM_file_line,MAX_LINE_LEN) ) | 
|  | 626       { | 
|  | 627 	     double hist_tmp; | 
|  | 628 	     sscanf(FZM_file_line, "%lf", &hist_tmp) ; | 
|  | 629 	     FZM.push_back(hist_tmp); | 
|  | 630       } | 
|  | 631       FZM_file.close(); | 
|  | 632       //start to read reads | 
|  | 633       cout<<"Reading reads from both ends...."<<endl; | 
|  | 634       ifstream read1_nomt_file(read1_nomt_file_name); | 
|  | 635       ifstream read2_nomt_file(read2_nomt_file_name); | 
|  | 636       char read1_line[MAX_LINE_LEN]; | 
|  | 637       char read2_line[MAX_LINE_LEN]; | 
|  | 638       int total_single_read_num=0; | 
|  | 639       int total_pair_read_num=0; | 
|  | 640       int total_read_num=0; | 
|  | 641       while(read1_nomt_file.getline(read1_line, MAX_LINE_LEN)) | 
|  | 642       { | 
|  | 643 	      total_read_num++; | 
|  | 644 	      ReadInfo read1(read1_line); | 
|  | 645 	      map<string, vector<int> > read1_mappos; | 
|  | 646 	      if(read1.isUseful) | 
|  | 647 	      { | 
|  | 648 	        read1.scan(read1_mappos); | 
|  | 649 	      } | 
|  | 650 	      read2_nomt_file.getline(read2_line, MAX_LINE_LEN); | 
|  | 651 	      ReadInfo read2(read2_line); | 
|  | 652 	      map<string, vector<int> > read2_mappos; | 
|  | 653 	      if(read2.isUseful) | 
|  | 654 	      { | 
|  | 655 		read2.scan(read2_mappos); | 
|  | 656 	      } | 
|  | 657 	      //pairing | 
|  | 658 	      ReadPair read_pair_ins (read1_mappos, read2_mappos, read_size); | 
|  | 659 	      //feed them into TXdb_entries | 
|  | 660 	      map<string , ReadPairInfo>::iterator it = read_pair_ins.mappinginfo.begin(); | 
|  | 661 	      for(; it!=read_pair_ins.mappinginfo.end();it++) | 
|  | 662 	      { | 
|  | 663 		      string eventid = it->first; | 
|  | 664 		      map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.find(eventid); | 
|  | 665 		      if(txdb_it == TXdb_entries.end() ) | 
|  | 666 		      { | 
|  | 667 			      continue; | 
|  | 668 		      } | 
|  | 669 		      //get numbers | 
|  | 670 		      for(int i=0; i<4;i++) | 
|  | 671 		      { | 
|  | 672 			      if(read_pair_ins.mappinginfo[eventid].Pos[i]!=-1) | 
|  | 673 			      { | 
|  | 674 				      int pos_tmp1 = read_pair_ins.mappinginfo[eventid].Pos[i]; | 
|  | 675 				      int pos_tmp2; | 
|  | 676 				      if(i<2) | 
|  | 677 				      { | 
|  | 678 					      if (txdb_it->second.strand == '-') | 
|  | 679 						      pos_tmp1 = txdb_it->second.len1 -pos_tmp1 +1- read_size; | 
|  | 680 					      pos_tmp2 = pos_tmp1 + read_size; | 
|  | 681 					      if (pos_tmp2 <= txdb_it->second.exonsize[0]) | 
|  | 682 					      {   txdb_it->second.enum1++;continue;} | 
|  | 683 					      if (pos_tmp1 <= txdb_it->second.exonsize[0] ) | 
|  | 684 					      { | 
|  | 685 						     if(read_pair_ins.mappinginfo[eventid].IsoFlag[1] !=1) | 
|  | 686 					             { | 
|  | 687 						      txdb_it->second.jnum12++; | 
|  | 688 					             } | 
|  | 689 					             else | 
|  | 690 					             { | 
|  | 691 						      txdb_it->second.enum1++; | 
|  | 692 					             } | 
|  | 693 						     continue; | 
|  | 694 					      } | 
|  | 695 					      if (pos_tmp2 <= txdb_it->second.exonsize[0]+txdb_it->second.exonsize[1]) | 
|  | 696                                               {   txdb_it->second.enum2++;continue;} | 
|  | 697 					      if (pos_tmp1 <= txdb_it->second.exonsize[0]+txdb_it->second.exonsize[1]) | 
|  | 698 					      { | 
|  | 699 						     if(read_pair_ins.mappinginfo[eventid].IsoFlag[1] !=1) | 
|  | 700 					             { | 
|  | 701 						      txdb_it->second.jnum23++; | 
|  | 702 					             } | 
|  | 703 					             else | 
|  | 704 					             { | 
|  | 705 						      txdb_it->second.enum2++; | 
|  | 706 					             } | 
|  | 707 						     continue; | 
|  | 708 | 
|  | 709 					      } | 
|  | 710 					      txdb_it->second.enum3++; | 
|  | 711 				      } | 
|  | 712 				      else | 
|  | 713 				      { | 
|  | 714 					      if (read_pair_ins.mappinginfo[eventid].IsoFlag[0] == 1 | 
|  | 715 						      && read_pair_ins.mappinginfo[eventid].IsoFlag[1] == 1) | 
|  | 716 						     continue; | 
|  | 717 					      if (txdb_it->second.strand == '-') | 
|  | 718 						      pos_tmp1 = txdb_it->second.len2 -pos_tmp1 +1- read_size; | 
|  | 719 					      pos_tmp2 = pos_tmp1 + read_size; | 
|  | 720 					      if (pos_tmp2 <= txdb_it->second.exonsize[0]) | 
|  | 721 					      {   txdb_it->second.enum1++;continue;} | 
|  | 722 					      if (pos_tmp1 <= txdb_it->second.exonsize[0] ) | 
|  | 723 					      { | 
|  | 724 						     if(read_pair_ins.mappinginfo[eventid].IsoFlag[0] !=1) | 
|  | 725 					             { | 
|  | 726 						      txdb_it->second.jnum13++; | 
|  | 727 					             } | 
|  | 728 					             else | 
|  | 729 					             { | 
|  | 730 						      txdb_it->second.enum1++; | 
|  | 731 					             } | 
|  | 732 						     continue; | 
|  | 733 					      } | 
|  | 734 					      txdb_it->second.enum3++; | 
|  | 735 | 
|  | 736 | 
|  | 737 				      } | 
|  | 738 			      } | 
|  | 739 		      } | 
|  | 740 | 
|  | 741 		      if(read_pair_ins.mappinginfo[eventid].isPaired) | 
|  | 742 		      { | 
|  | 743 			      total_pair_read_num += 2; | 
|  | 744 			      txdb_it->second.totalreadnum += 2; | 
|  | 745 			      (txdb_it->second).pairs.push_back(read_pair_ins.mappinginfo[eventid]); | 
|  | 746 		      } | 
|  | 747 		      else | 
|  | 748 		      { | 
|  | 749 			      total_single_read_num += 2; | 
|  | 750 			      txdb_it->second.totalreadnum += 1; | 
|  | 751 			      if(read_pair_ins.mappinginfo[eventid].IsoFlag[0] == 1) | 
|  | 752 			      { | 
|  | 753 				      if(read_pair_ins.mappinginfo[eventid].IsoFlag[1] == 0) | 
|  | 754 				      { | 
|  | 755 			                (txdb_it->second).snum1++; | 
|  | 756 				      } | 
|  | 757 				      else | 
|  | 758 				      { | 
|  | 759 					(txdb_it->second).snum12++; | 
|  | 760 				      } | 
|  | 761 			      } | 
|  | 762 			      else | 
|  | 763 			      { | 
|  | 764 				      (txdb_it->second).snum2++; | 
|  | 765 			      } | 
|  | 766 		      } | 
|  | 767 	      } | 
|  | 768 | 
|  | 769 | 
|  | 770       } | 
|  | 771 | 
|  | 772       read1_nomt_file.close(); | 
|  | 773       read2_nomt_file.close(); | 
|  | 774       cout<<"Total read number:\t"<<total_read_num*2<<endl; | 
|  | 775       cout<<"Singled reads used:\t"<<total_single_read_num<<endl; | 
|  | 776       cout<<"Paired reads used:\t"<<total_pair_read_num<<endl; | 
|  | 777 | 
|  | 778       log_file<<"#total_read_num:\t"<<total_read_num*2<<endl; | 
|  | 779       log_file<<"#total_single_read_num:\t"<<total_single_read_num<<endl; | 
|  | 780       log_file<<"#total_pair_read_num:\t"<<total_pair_read_num<<endl; | 
|  | 781 | 
|  | 782       //Calculate ratios and output | 
|  | 783       cout<<"Now doing estimations and output to "<<ratio_file_name<<"\t"<<num_file_name<<endl; | 
|  | 784       ofstream ratio_file(ratio_file_name.c_str()); | 
|  | 785       ofstream num_file(num_file_name.c_str()); | 
|  | 786       map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.begin(); | 
|  | 787       for(;txdb_it!=TXdb_entries.end();txdb_it++) | 
|  | 788       { | 
|  | 789 	      string eventid = txdb_it->first; | 
|  | 790 	      //cout<< eventid <<endl; | 
|  | 791 	      txdb_it->second.estimate(FZM,IRM,read_size); | 
|  | 792 /*	      ratio_file<< eventid<<"\t" | 
|  | 793 	      		<<setprecision(4)<<fixed | 
|  | 794 	                <<txdb_it->second.ir<<"\t" | 
|  | 795 			<<txdb_it->second.bir<<"\t" | 
|  | 796 			<<"NA\tNA\tNA\t" | 
|  | 797 			<<txdb_it->second.jnum12<<"\t" | 
|  | 798 			<<txdb_it->second.jnum23<<"\t" | 
|  | 799 			<<txdb_it->second.jnum13<<"\t" | 
|  | 800 			<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\t" | 
|  | 801 			<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\t" | 
|  | 802 			<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\t" | 
|  | 803 			<<txdb_it->second.exonsize[0]<<"\t" | 
|  | 804 			<<txdb_it->second.exonsize[1]<<"\t" | 
|  | 805 			<<txdb_it->second.exonsize[2]<<"\t" | 
|  | 806 		<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<endl; | 
|  | 807 		*/ | 
|  | 808 	      ratio_file<<eventid<<"\t" | 
|  | 809 	      		<<setprecision(4)<<fixed | 
|  | 810 			<<txdb_it->second.ir<<"\t"  //1 | 
|  | 811 			<<txdb_it->second.bir<<"\t" //2 | 
|  | 812 			<<txdb_it->second.chrid<<"\t" //3 | 
|  | 813 			<<txdb_it->second.start<<"," // | 
|  | 814 			<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\t" //4 | 
|  | 815 			<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<"," | 
|  | 816 			<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\t"//5 | 
|  | 817 			<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<"," | 
|  | 818 			<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\t"//6 | 
|  | 819 			<<txdb_it->second.strand<<"\t"//7 | 
|  | 820 			<<txdb_it->second.jnum12<<"\t"//8 | 
|  | 821 			<<txdb_it->second.jnum23<<"\t"//9 | 
|  | 822 			<<txdb_it->second.jnum13<<"\t"//10 | 
|  | 823 			<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\t"//11 | 
|  | 824 			<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\t"//12 | 
|  | 825 			<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\t"//13` | 
|  | 826 			<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<"\t"//14 | 
|  | 827 			<<txdb_it->second.exonsize[0]<<"\t"//15 | 
|  | 828 			<<txdb_it->second.exonsize[1]<<"\t"//16 | 
|  | 829 			<<txdb_it->second.exonsize[2]<<endl;//17 | 
|  | 830 | 
|  | 831 	      num_file<< eventid<<"\t" | 
|  | 832 			<<txdb_it->second.chrid<<"\t" | 
|  | 833 			<<txdb_it->second.start<<"," | 
|  | 834 			<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\t" | 
|  | 835 			<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<"," | 
|  | 836 			<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\t" | 
|  | 837 			<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<"," | 
|  | 838 			<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\t" | 
|  | 839 			<<txdb_it->second.strand<<"\t" | 
|  | 840                 <<txdb_it->second.enum1<<"\t" | 
|  | 841 			<<txdb_it->second.enum2<<"\t" | 
|  | 842 			<<txdb_it->second.enum3<<"\t" | 
|  | 843 			<<txdb_it->second.jnum12<<"\t" | 
|  | 844 			<<txdb_it->second.jnum23<<"\t" | 
|  | 845 			<<txdb_it->second.jnum13<<endl; | 
|  | 846       } | 
|  | 847       log_file.close(); | 
|  | 848       ratio_file.close(); | 
|  | 849       num_file.close(); | 
|  | 850       clock_finish = clock(); | 
|  | 851       cout<<"Done! time used:"<<(double)(clock_finish-clock_start)/CLOCKS_PER_SEC<<" seconds"<<endl; | 
|  | 852 | 
|  | 853 | 
|  | 854 } |