view PsiCLASS-1.0.2/FindJunction.cpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
line wrap: on
line source

/*
  Find junctions from the SAM file generated by Tophat2.
  The SAM file should be sorted.
  The junction is the start and the end coordinates of the spliced region in this program. The output of the junction is a bit different.
  
  Usage: ./a.out input [option] >output. 
  	./a.out -h for help.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "sam.h"

#define LINE_SIZE 4097
#define QUEUE_SIZE 10001
#define HASH_MAX 1000003

struct _readTree
{
	char id[256] ;
	int leftAnchor, rightAnchor ;
	int pos ;
	//bool secondary ;
	bool valid ;
	int editDistance ;
	int NH, cnt ; // if cnt < NH, then it has real secondary match for this splice junction 
	struct _readTree *left, *right ;

	int flag ;// The flag from sam head.
} ;

// The structure of a junction
struct _junction
{
	int start, end ; 
	int readCnt ; // The # of reads containing this junction
	int secReadCnt ; // The # of reads whose secondary alignment has this junction.
	char strand ; // On '+' or '-' strand
	int leftAnchor, rightAnchor ; // The longest left and right anchor
	int oppositeAnchor ; // The longest anchor of the shorter side.
	int uniqEditDistance, secEditDistance ;
	struct _readTree head ;
} ;

char line[LINE_SIZE] ;
char col[11][LINE_SIZE] ; // The option fields is not needed.
char strand ; // Extract XS field
char noncanonStrandInfo ;
//bool secondary ;
int NH ;
int editDistance ;
int mateStart ;
int filterYS ;
int samFlag ;

struct _junction junctionQueue[QUEUE_SIZE] ; // Expected only a few junctions in it for each read. This queue is sorted.

int qHead, qTail ;

bool flagPrintJunction ;
bool flagPrintAll ;
bool flagStrict ; 
int junctionCnt ;
bool anchorBoth ; 
bool validRead ;

int flank ;
struct _cigarSeg
{
	int len ;
	char type ;
} ;

struct _readTree *contradictedReads ;

char nucToNum[26] = { 0, 4, 1, 4, 4, 4, 2, 
	4, 4, 4, 4, 4, 4, 4,
		4, 4, 4, 4, 4, 3,
		4, 4, 4, 4, 4, 4 } ;

void PrintHelp()
{
	printf( 
		"Prints reads from the SAM/BAM file that containing junctions.\n" 
		"Usage: ./a.out input [option]>output\n"
		"Options:\n"
	    "\t-j xx [-B]: Output the junctions using 1-based coordinates. The format is \"reference id\" start end \"# of read\" strand.(They are sorted)\n and the xx is an integer means the maximum unqualified anchor length for a splice junction(default=8). If -B, the splice junction must be supported by a read whose both anchors are longer than xx.\n"
	    "\t-a: Output all the junctions, and use non-positive support number to indicate unqualified junctions.\n"
	    "\t-y: If the bits from YS field of bam matches the argument, we filter the alignment (default: 4).\n"
	      ) ;
}

void GetJunctionInfo( struct _junction &junc, struct _readTree *p )
{
	if ( p == NULL )
		return ;

	if ( p->valid )
	{
		//if ( junc.start == 22381343 + 1 && junc.end == 22904987 - 1 )
		//	printf( "%s %d %d %d\n", p->id, p->leftAnchor, p->rightAnchor, p->flag ) ;
		


		if ( p->cnt < p->NH )
		{
			junc.secEditDistance += p->editDistance ; 	
			++junc.secReadCnt ;
		}
		else
		{
			junc.uniqEditDistance += p->editDistance ;
			++junc.readCnt ;
		}
		int l = p->leftAnchor, r = p->rightAnchor ;

		if ( !anchorBoth )
		{
			if ( l > junc.leftAnchor )
				junc.leftAnchor = l ;
			if ( r > junc.rightAnchor )
				junc.rightAnchor = r ;

			if ( l <= r && l > junc.oppositeAnchor )
				junc.oppositeAnchor = l ;
			else if ( r < l && r > junc.oppositeAnchor )
				junc.oppositeAnchor = r ;
		}
		else
		{
			if ( l > flank && r > flank )
			{
				junc.leftAnchor = l ;
				junc.rightAnchor = r ;
			}

			if ( l <= r && l > junc.oppositeAnchor )
				junc.oppositeAnchor = l ;
			else if ( r < l && r > junc.oppositeAnchor )
				junc.oppositeAnchor = r ;
		}
	}
	GetJunctionInfo( junc, p->left ) ;
	GetJunctionInfo( junc, p->right ) ;
}

void PrintJunctionReads( struct _junction &junc, struct _readTree *p )
{
	if ( p == NULL )
		return ;

	if ( p->valid )
		printf( "%s\n", p->id ) ;
	PrintJunctionReads( junc, p->left ) ;
	PrintJunctionReads( junc, p->right ) ;
}

void PrintJunction( char *chrome, struct _junction &junc )
{
	int sum ;
	junc.leftAnchor = 0 ;
	junc.rightAnchor = 0 ;
	junc.oppositeAnchor = 0 ;
	junc.readCnt = 0 ;
	junc.secReadCnt = 0 ;
	junc.uniqEditDistance = 0 ;
	junc.secEditDistance = 0 ;
	GetJunctionInfo( junc, &junc.head ) ;

	sum = junc.readCnt + junc.secReadCnt ;

	if ( junc.leftAnchor <= flank || junc.rightAnchor <= flank || ( junc.readCnt + junc.secReadCnt <= 0 ) )
	{
		if ( flagPrintAll )
		{
			sum = -sum ;
		}
		else
			return ;
	}
	
	/*if ( junc.end - junc.start + 1 >= 200000 )
	{
		if ( junc.secReadCnt > 0 )
			junc.secReadCnt = 1 ;
	}*/

	/*if ( junc.oppositeAnchor <= ( ( flank / 2 < 1 ) ? flank / 2 : 1 ) && ( junc.readCnt + junc.secReadCnt ) <= 10 )
	{
		if ( junc.readCnt > 0 )
			junc.readCnt = 1 ;
		if ( junc.secReadCnt > 0 )
			junc.secReadCnt = 0 ;
	}*/

	printf( "%s %d %d %d %c %d %d %d %d\n", chrome, junc.start - 1, junc.end + 1, sum, junc.strand, 
		junc.readCnt, junc.secReadCnt, junc.uniqEditDistance, junc.secEditDistance ) ;
	//PrintJunctionReads( junc, &junc.head ) ;
}

void ClearReadTree( struct _readTree *p )
{
	if ( p == NULL )
		return ;
	ClearReadTree( p->left ) ;
	ClearReadTree( p->right ) ;
	free( p ) ;
}

// Insert to the read tree
bool InsertReadTree( struct _readTree *p, char *id, int l, int r )
{
	int tmp = strcmp( p->id, id ) ;
	if ( tmp == 0 )
	{
		p->cnt += 1 ;
		return true ;
	}
	else if ( tmp < 0 )
	{
		if ( p->left )
			return InsertReadTree( p->left, id, l, r ) ;
		else
		{
			p->left = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;	
			strcpy( p->left->id, id ) ;
			p->left->leftAnchor = l ;
			p->left->rightAnchor = r ;
			//p->left->secondary = secondary ;
			p->left->editDistance = editDistance ;
			p->left->left = p->left->right = NULL ;
			p->left->valid = validRead ;
			p->left->cnt = 1 ;
			p->left->NH = NH ;
			p->left->flag = samFlag ;
			return false ;
		}
	}
	else
	{
		if ( p->right )
			return InsertReadTree( p->right, id, l, r ) ;
		else
		{
			p->right = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;	
			strcpy( p->right->id, id ) ;
			p->right->leftAnchor = l ;
			p->right->rightAnchor = r ;
			//p->right->secondary = secondary ;
			p->right->editDistance = editDistance ;
			p->right->left = p->right->right = NULL ;
			p->right->valid = validRead ;
			p->right->cnt = 1 ;
			p->right->NH = NH ;
			p->right->flag = samFlag ;
			return false ;
		}
	}
}


bool SearchContradictedReads( struct _readTree *p, char *id, int pos )
{
	if ( p == NULL )
		return false ;
	int tmp = strcmp( p->id, id ) ;
	if ( tmp == 0 && p->pos == pos )
		return true ;
	else if ( tmp <= 0 )
		return SearchContradictedReads( p->left, id, pos ) ;
	else
		return SearchContradictedReads( p->right, id, pos ) ;
}

bool InsertContradictedReads( struct _readTree *p, char *id, int pos )
{
	if ( p == NULL )
	{
		contradictedReads = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;	
		strcpy( contradictedReads->id, id ) ;
		contradictedReads->pos = pos ;
		contradictedReads->left = contradictedReads->right = NULL ;
		return false ;
	}
	int tmp = strcmp( p->id, id ) ;
	if ( tmp == 0 && p->pos == pos )
		return true ;
	else if ( tmp <= 0 )
	{
		if ( p->left )
			return InsertContradictedReads( p->left, id, pos ) ;
		else
		{
			p->left = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;	
			strcpy( p->left->id, id ) ;
			p->left->pos = pos ;
			p->left->left = p->left->right = NULL ;
			return false ;
		}
	}
	else
	{
		if ( p->right )
			return InsertContradictedReads( p->right, id, pos ) ;
		else
		{
			p->right = (struct _readTree *)malloc( sizeof( struct _readTree ) ) ;	
			strcpy( p->right->id, id ) ;
			p->right->pos = pos ;
			p->right->left = p->right->right = NULL ;
			return false ;
		}
	}
}

struct _readTree *GetReadTreeNode( struct _readTree *p, char *id )
{
	if ( p == NULL )
		return NULL ;
	int tmp = strcmp( p->id, id ) ;
	if ( tmp == 0 )
		return p ;
	else if ( tmp < 0 )
		return GetReadTreeNode( p->left, id ) ;
	else
		return GetReadTreeNode( p->right, id ) ;
}

// Insert the new junction into the queue, 
// and make sure the queue is sorted.
// Assume each junction range is only on one strand. 
// l, r is the left and right anchor from a read
void InsertQueue( int start, int end, int l, int r )
{
	int i, j ;
	i = qTail ;
	

	while ( i != qHead )
	{
		j = i - 1 ;
		if ( j < 0 )
			j = QUEUE_SIZE - 1 ;
		
		if ( junctionQueue[j].start < start 
			|| ( junctionQueue[j].start == start && junctionQueue[j].end < end ) )
			break ;
		
		junctionQueue[i] = junctionQueue[j] ;
		--i ;
		
		if ( i < 0 )
			i = QUEUE_SIZE - 1 ;
	}
	
	junctionQueue[i].start = start ;
	junctionQueue[i].end = end ;
	/*if ( !secondary )
	{
		junctionQueue[i].readCnt = 1 ;
		junctionQueue[i].secReadCnt = 0 ;
	}
	else
	{
		junctionQueue[i].readCnt = 0 ;
		junctionQueue[i].secReadCnt = 1 ;
	}*/
	junctionQueue[i].strand = strand ;
	junctionQueue[i].leftAnchor = l ;
	junctionQueue[i].rightAnchor = r ;
	
	strcpy( junctionQueue[i].head.id, col[0] ) ;
	junctionQueue[i].head.valid = validRead ;
	junctionQueue[i].head.leftAnchor = l ;
	junctionQueue[i].head.rightAnchor = r ;
	junctionQueue[i].head.left = junctionQueue[i].head.right = NULL ;
	//junctionQueue[i].head.secondary = secondary ;
	junctionQueue[i].head.NH = NH ;
	junctionQueue[i].head.cnt = 1 ;
	junctionQueue[i].head.editDistance = editDistance ;

	++qTail ;
	if ( qTail >= QUEUE_SIZE )
		qTail = 0 ;
}

// Search the queue, and remove the heads if its end(start) is smaller than prune.(Because it is impossible to have that junction again) 
// Add the junction to the queue, if it is not in it. 
// Return true, if it finds the junction. Otherwise, return false.
bool SearchQueue( int start, int end, int prune, int l, int r )
{
	int i ;
	
	// Test whether this read might be a false alignment.
	i = qHead ;
	while ( i != qTail )
	{
		struct _readTree *rt ;
		if ( ( junctionQueue[i].start == start && junctionQueue[i].end < end ) ||
			( junctionQueue[i].start > start && junctionQueue[i].end == end ) )
		{
			// This alignment is false ;
			rt = GetReadTreeNode( &junctionQueue[i].head, col[0] ) ;
			// the commented out logic because it is handled by contradicted reads
			if ( rt != NULL ) //&& ( rt->flag & 0x40 ) != ( samFlag & 0x40 ) )
			{
				if ( rt->leftAnchor <= flank || rt->rightAnchor <= flank )//|| rt->secondary )
				{
					if ( l > flank && r > flank )//&& !secondary ) 
						rt->valid = false ;
					else
						validRead = false ;
				}
				else
				{
					// Ignore this read
					//return true ;
					validRead = false ;
				}
			}
		}
		else if ( ( junctionQueue[i].start == start && junctionQueue[i].end > end ) ||
			( junctionQueue[i].start < start && junctionQueue[i].end == end ) )
		{
			// This other alignment is false ;
			rt = GetReadTreeNode( &junctionQueue[i].head, col[0] ) ;
			//if ( rt != NULL )
			if ( rt != NULL ) //&& ( rt->flag & 0x40 ) != ( samFlag & 0x40 ) )
			{
				if ( l <= flank || r <= flank )//|| secondary )
				{
					if ( rt->leftAnchor > flank && rt->rightAnchor > flank )//&& !rt->secondary )
						validRead = false ;
					else
						rt->valid = false ;
				}
				else	
					rt->valid = false ;
			}
		}
		++i ;
		if ( i >= QUEUE_SIZE )
			i = 0 ;
	}
	
	//if ( start == 8077108 && end == 8078439 )
	//	exit( 1 ) ;
	i = qHead ;

	while ( i != qTail )
	{
		if ( junctionQueue[i].start == start && 
			junctionQueue[i].end == end )
		{
			if (junctionQueue[i].strand == '?' && junctionQueue[i].strand != strand )
			{
				junctionQueue[i].strand = strand ;
			}
			InsertReadTree( &junctionQueue[i].head, col[0], l, r ) ; 
			return true ;
		}
		
		if ( junctionQueue[i].end < prune && i == qHead )
		{
			// pop
			PrintJunction( col[2], junctionQueue[i] ) ;
			ClearReadTree( junctionQueue[i].head.left ) ;
			ClearReadTree( junctionQueue[i].head.right ) ;
			++qHead ;
			if ( qHead >= QUEUE_SIZE )
				qHead = 0 ;
		}
		++i ;
		if ( i >= QUEUE_SIZE )
			i = 0 ;
	}
	
	InsertQueue( start, end, l, r ) ;
	return false ;
}

// Compute the junctions based on the CIGAR  
bool CompareJunctions( int startLocation, char *cigar )
{
	int currentLocation = startLocation ; // Current location on the reference genome
	int i, j ;
	int num ;
	int newJuncCnt = 0 ; // The # of junctions in the read, and the # of new junctions among them.
	
	struct _cigarSeg cigarSeg[1000] ; // A segment of the cigar.
	int ccnt = 0 ; // cigarSeg cnt

	j = 0 ;
	num = 0 ;
	validRead = true ;

	for ( i = 0 ; cigar[i] ; ++i )
	{
		if ( cigar[i] >= '0' && cigar[i] <= '9' )
		{
			num = num * 10 + cigar[i] - '0' ;
		}
		else
		{
			cigarSeg[ccnt].len = num ;
			cigarSeg[ccnt].type = cigar[i] ;
			++ccnt ;
			num = 0 ;
		}
	}

	// Filter low complex alignment.
	// Only applies this to alignments does not have a strand information.
	if ( strand == '?' )
	{
		if ( noncanonStrandInfo != -1 )
		{
			if ( ( noncanonStrandInfo & filterYS ) != 0 )
				validRead = false ;		
		}
		else
		{
			int softStart = -1 ;
			int softEnd = 0 ;
			if ( cigarSeg[0].type == 'S' )
				softStart = cigarSeg[0].len ;
			if ( cigarSeg[ ccnt - 1 ].type == 'S' )
				softEnd = cigarSeg[ ccnt - 1 ].len ;
			int readLen = strlen( col[9] ) ;
			int count[5] = { 0, 0, 0, 0, 0 } ;

			int pos = 0 ;
			for ( i = 0 ; i < ccnt ; ++i )
			{
				switch ( cigarSeg[i].type )
				{
					case 'S':
						pos += cigarSeg[i].len ;
					case 'M':
					case 'I':
						{
							for ( j = 0 ; j < cigarSeg[i].len ; ++j )
								++count[ nucToNum[  col[9][pos + j] - 'A' ] ] ;
							pos += j ;
						} break ;
					case 'N':
						{
							int max = 0 ;
							int sum = 0 ;
							for ( j = 0 ; j < 5 ; ++j )
							{
								if ( count[j] > max )
									max = count[j] ;
								sum += count[j] ;
							}
							if ( max > 0.8 * sum )
								validRead = false ;
							count[0] = count[1] = count[2] = count[3] = count[4] = 0 ;
						} break ;
					case 'H':
					case 'P':
					case 'D':
					default: break ;
				}
			}

			int max = 0 ;
			int sum = 0 ;
			for ( j = 0 ; j < 5 ; ++j )
			{
				if ( count[j] > max )
					max = count[j] ;
				sum += count[j] ;
			}
			if ( max > 0.8 * sum )
				validRead = false ;
			/*	count[0] = count[1] = count[2] = count[3] = count[4] = 0 ;


			for ( i = softStart + 1 ; i < readLen - softEnd ; ++i )
			  {
			  switch ( col[9][i] )
			  {
			  case 'A': ++count[0] ; break ;
			  case 'C': ++count[1] ; break ;
			  case 'G': ++count[2] ; break ;
			  case 'T': ++count[3] ; break ;
			  default: ++count[4] ; 
			  }
			  }
			  int max = 0 ;
			  for ( j = 0 ; j < 5 ; ++j )
			  if ( count[j] > max )
			  max = count[j] ;
			  if ( max > 0.6 * ( readLen - softEnd - softStart - 1 ) )
			  validRead = false ;*/
		}
	}

	// Test whether contradict with mate pair
	if ( col[6][0] == '=' )
	{
		currentLocation = startLocation ;
		for ( i = 0 ; i < ccnt ; ++i )
		{
			if ( cigarSeg[i].type == 'I' || cigarSeg[i].type == 'S' || cigarSeg[i].type == 'H'
				|| cigarSeg[i].type == 'P' )
				continue ;
			else if ( cigarSeg[i].type == 'N' )
			{
				if ( mateStart >= currentLocation && mateStart <= currentLocation + cigarSeg[i].len - 1 )
				{
				/*if ( cigarSeg[i].len == 91486 )
				{
					printf( "%s %d %d\n", currentLocation, mateStart ) ;
					exit( 1 ) ;
				}*/
					// ignore this read
					//return false ;
					InsertContradictedReads( contradictedReads, col[0], mateStart ) ;
					validRead = false ;
					break ;
				}
			}
			currentLocation += cigarSeg[i].len ;
		}

		i = qHead ;
		// Search if it falls in the splices junction created by its mate.
		while ( i != qTail )
		{
			if ( mateStart < junctionQueue[i].start && junctionQueue[i].start <= startLocation 
				&& startLocation <= junctionQueue[i].end 
				&& SearchContradictedReads( contradictedReads, col[0], startLocation ) )
			{
				validRead = false ;
				break ;
			}
			++i ;
			if ( i >= QUEUE_SIZE )
				i = 0 ;
		}
	}

	currentLocation = startLocation ;
	for ( i = 0 ; i < ccnt ; ++i )
	{
		if ( cigarSeg[i].type == 'I' || cigarSeg[i].type == 'S' || cigarSeg[i].type == 'H'
				|| cigarSeg[i].type == 'P' )
			continue ;
		else if ( cigarSeg[i].type == 'N' )
		{
			int left, right ;
			int tmp ;
			tmp = i ;
			while ( i > 0 && ( cigarSeg[i - 1].type == 'I' || cigarSeg[i - 1].type == 'D' ) )
				--i ;
			if ( i > 0 && cigarSeg[i - 1].type == 'M' )
			{
				left = cigarSeg[i - 1].len ;
				if ( i >= 2 && cigarSeg[i - 2].type == 'N' && left <= flank )
				{
					left = flank + 1 ;
				}
			}
			else
				left = 0 ;

			i = tmp ;
			while ( i < ccnt && ( cigarSeg[i + 1].type == 'I' || cigarSeg[i + 1].type == 'D' ) )
				++i ;
			if ( i < ccnt && cigarSeg[i + 1].type == 'M' )
			{
				right = cigarSeg[i + 1].len ;
				if ( i + 2 < ccnt && cigarSeg[i + 2].type == 'N' && right <= flank )
				{
					right = flank + 1 ;
				}
			}
			else
				right = 0 ;
			i = tmp ;
			if ( !SearchQueue( currentLocation, currentLocation + cigarSeg[i].len - 1, startLocation, left, right ) )
			{
				++newJuncCnt ;
			}
		}
		currentLocation += cigarSeg[i].len ;
	}
		/*else if ( cigar[i] == 'I' )
		{
			num = 0 ;
		}
		else if ( cigar[i] == 'N' )
		{
			if ( !SearchQueue( currentLocation, currentLocation + num - 1, startLocation ) )
			{
				++newJuncCnt ;
				//if ( flagPrintJunction )
				//	printf( "%s %d %d\n", col[2], currentLocation - 2, currentLocation + len - 1 ) ;
			}
			currentLocation += num ;
			num = 0 ;
		}
		else // Other operations, like M, D,...,
		{
			currentLocation += num ;
			num = 0 ;
		}*/
	
	junctionCnt += newJuncCnt ;
	
	if ( newJuncCnt )
		return true ;
	else
		return false ;
}

void cigar2string( bam1_core_t *c, uint32_t *in_cigar, char *out_cigar )
{
	int k, op, l ;
	char opcode ;

	*out_cigar = '\0' ;
	for ( k = 0 ; k < c->n_cigar ; ++k )
	{
		op = in_cigar[k] & BAM_CIGAR_MASK ;
		l = in_cigar[k] >> BAM_CIGAR_SHIFT ;
		switch (op)
		{
			case BAM_CMATCH: opcode = 'M' ; break ;
			case BAM_CINS: opcode = 'I' ; break ;
			case BAM_CDEL: opcode = 'D' ; break ;
			case BAM_CREF_SKIP: opcode = 'N' ; break ;
			case BAM_CSOFT_CLIP: opcode = 'S' ; break ;
			case BAM_CHARD_CLIP: opcode = 'H' ; break ;
			case BAM_CPAD: opcode = 'P' ; break ;
		}
		sprintf( out_cigar + strlen( out_cigar ), "%d%c", l, opcode ) ;
	}
}



int main( int argc, char *argv[] ) 
{
	FILE *fp ;
 	samfile_t *fpsam ;
	bam1_t *b = NULL ;
	bool useSam = true ;

 	int i, len ;
 	int startLocation ; 
	bool flagRemove = false ;
	char prevChrome[103] ;

	anchorBoth = false ;	
	
	flagPrintJunction = false ;
	flagPrintAll = false ;
	flagStrict = false ;
	junctionCnt = 0 ;
	bool hasMateReadIdSuffix = false ;

	strcpy( prevChrome, "" ) ;
	flagRemove = true ;
	flagPrintJunction = true ;
	flank = 8 ;
	filterYS = 4 ;

	contradictedReads = NULL ;
	
	// processing the argument list
	for ( i = 1 ; i < argc ; ++i )
	{
		if ( !strcmp( argv[i], "-h" ) )
		{
			PrintHelp() ;
			return 0 ;
		}
		else if ( !strcmp( argv[i], "-r" ) )
		{
			flagRemove = true ;
		}
		else if ( !strcmp( argv[i], "-j" ) )
		{
			strcpy( prevChrome, "" ) ;
			flagRemove = true ;
			flagPrintJunction = true ;
			flank = atoi( argv[i + 1] ) ;
			if ( i + 2 < argc && !strcmp( argv[i+2], "-B" ) )
			{
				anchorBoth = true ;
				++i ;		
			}
			++i ;
		}
		else if ( !strcmp( argv[i], "-a" ) )
		{
			flagPrintAll = true ;
		}
		else if ( !strcmp( argv[i], "--strict" ) )
		{
			flagStrict = true ;
		}
		else if ( !strcmp( argv[i], "-y" ) )
		{
			filterYS = atoi( argv[i + 1] ) ;
			++i ;
		}
		else if ( !strcmp( argv[i], "--hasMateIdSuffix" ) )
		{
			hasMateReadIdSuffix = true ;
			++i ;
		}
		else if ( i > 1 )
		{
			printf( "Unknown option %s\n", argv[i] ) ;
			exit( 1 ) ;
		}
	}
	if ( argc == 1 )
	{
		PrintHelp() ;
		return 0 ;
	}

	len = strlen( argv[1] ) ;
	if ( argv[1][len-3] == 'b' || argv[1][len-3] == 'B' )
	{	
		if ( !( fpsam = samopen( argv[1], "rb", 0 ) ) )
			return 0 ;

		if ( !fpsam->header )
		{
			//samclose( fpsam ) ;
			//fpsam = samopen( argv[1], "r", 0 ) ;
			//if ( !fpsam->header )
			//{
			useSam = false ;
			fp = fopen( argv[1], "r" ) ;
			//}
		}
	}
	else
	{
		useSam = false ;
		fp = NULL ;
		if ( !strcmp( argv[1], "-" ) )
			fp = stdin ;
		else
			fp = fopen( argv[1], "r" ) ;
		if ( fp == NULL )
		{
			printf( "Could not open file %s\n", argv[1] ) ;
			return 0 ;
		}
	}

	while ( 1 )
	{
		int flag = 0 ;
		if ( useSam )
		{
			if ( b )
				bam_destroy1( b ) ;
			b = bam_init1() ;
			if ( samread( fpsam, b ) <= 0 )
				break ;
			if ( b->core.tid >= 0 )
				strcpy( col[2], fpsam->header->target_name[b->core.tid] ) ;
			else
				strcpy( col[2], "-1" ) ;
			cigar2string( &(b->core), bam1_cigar( b ), col[5] ) ;
			strcpy( col[0], bam1_qname( b ) ) ;	
			flag = b->core.flag ;	
			if ( bam_aux_get( b, "NH" ) )
			{	
				/*if ( bam_aux2i( bam_aux_get(b, "NH" ) ) >= 2 )
				{
					secondary = true ;
				}
				else
					secondary = false ;*/

				NH = bam_aux2i( bam_aux_get( b, "NH" ) ) ;
			}
			else
			{
				//secondary = false ;
				NH = 1 ;
			}

			if ( bam_aux_get( b, "NM" ) )
			{
				editDistance = bam_aux2i( bam_aux_get( b, "NM" ) ) ;
			}
			else if ( bam_aux_get( b, "nM" ) )
			{
				editDistance = bam_aux2i( bam_aux_get( b, "nM" ) ) ;
			}
			else
				editDistance = 0 ;

			mateStart = b->core.mpos + 1 ;
			if ( b->core.mtid == b->core.tid )
				col[6][0] = '=' ;
			else
				col[6][0] = '*' ;		

			if ( b->core.l_qseq < 20 )
				continue ;
			
			for ( i = 0 ; i < b->core.l_qseq ; ++i )
			{
				int bit = bam1_seqi( bam1_seq( b ), i ) ;
				switch ( bit )
				{
					case 1: col[9][i] = 'A' ; break ;
					case 2: col[9][i] = 'C' ; break ;
					case 4: col[9][i] = 'G' ; break ;
					case 8: col[9][i] = 'T' ; break ;
					case 15: col[9][i] = 'N' ; break ;
					default: col[9][i] = 'A' ; break ;
				}	
			}
			col[9][i] = '\0' ;

			/*if ( flag & 0x100 )
				secondary = true ;
			else 
				secondary = false ;*/
		}
		else
		{
			char *p ;
			if ( !fgets( line, LINE_SIZE, fp ) )
				break ;
			if ( line[0] == '\0' || line[0] == '@' )
				continue ;
			sscanf( line, "%s%s%s%s%s%s%s%s%s%s%s", col, col + 1, col + 2, col + 3, col + 4, 
					col + 5, col + 6, col + 7, col + 8, col + 9,  col + 10 ) ;
					
			flag = atoi( col[1] ) ;
			if ( p = strstr( line, "NH" ) )
			{
				int k = 0 ;
				p += 5 ;
				while ( *p != ' ' && *p != '\t' && *p != '\0')
				{
					k = k * 10 + *p - '0' ;
					++p ;
				}
				/*if ( k >= 2 )
				{
					secondary = true ;
				}
				else
					secondary = false ;*/
				NH = k ;
			}
			else
			{
				//secondary = false ;
				NH = 1 ;
			}
			if ( ( p = strstr( line, "NM" ) ) || ( p = strstr( line, "nM" ) ) )
			{
				int k = 0 ;
				p += 5 ;
				while ( *p != ' ' && *p != '\t' && *p != '\0')
				{
					k = k * 10 + *p - '0' ;
					++p ;
				}
				editDistance = k ;
			}
			else
				editDistance = 0 ;

			mateStart = atoi( col[7] ) ;
			/*if ( flag & 0x100 )
				secondary = true ;
			else 
				secondary = false ;*/
			
		}
		samFlag = flag ;
		for ( i = 0 ; col[5][i] ; ++i )
			if ( col[5][i] == 'N' )
				break ;

		if ( !col[5][i] )
			continue ;
		
		// remove .1, .2 or /1, /2 suffix
		if ( hasMateReadIdSuffix )
		{
			char *s = col[0] ;
			int len = strlen( s ) ;
			if ( len >= 2 && ( s[len - 1] == '1' || s[len - 1] == '2' ) 
				&& ( s[len - 2] == '.' || s[len - 2] == '/' ) )
			{
				s[len - 2] = '\0' ;
			}
		}

		if ( useSam )
		{
			if ( bam_aux_get( b, "XS" ) )
			{
				strand = bam_aux2A( bam_aux_get( b, "XS" ) ) ;
				if ( bam_aux_get( b, "YS" ) )
				{
					noncanonStrandInfo = bam_aux2i( bam_aux_get( b, "YS" ) ) ;
				}
				else
				{
					noncanonStrandInfo = -1 ;
				}
			}
			else
			{
				strand = '?' ;
				noncanonStrandInfo = -1 ;
			}
			startLocation = b->core.pos + 1 ;
		}
		else
		{
			if ( strstr( line, "XS:A:-" ) ) // on negative strand
				strand = '-' ;
			else if ( strstr( line, "XS:A:+" ) )
				strand = '+' ;
			else
			{
				strand = '?' ;
				char *p = strstr( line, "YS:i:" ) ;
				if ( p != NULL )
				{
					p += 5 ;
					noncanonStrandInfo = atoi( p ) ;
				}
				else
					noncanonStrandInfo = -1 ;
			}
			startLocation = atoi( col[3] ) ;  
		}
		
		// Found the junctions from the read.
		if ( strcmp( prevChrome, col[2] ) )
		{
			if ( flagPrintJunction )
			{
				// Print the remaining elements in the queue
				i = qHead ;
				while ( i != qTail )
				{
					PrintJunction( prevChrome, junctionQueue[i] ) ;
					++i ;
					if ( i >= QUEUE_SIZE )
						i = 0 ;
				}
			}

			// new chromosome
			ClearReadTree( contradictedReads ) ;
			contradictedReads = NULL ;
			qHead = qTail = 0 ;
			strcpy( prevChrome, col[2] ) ;
		}

		if ( flagRemove )
		{
			if ( CompareJunctions( startLocation, col[5] ) )
			{
				// Test whether this read has new junctions
				//++junctionCnt ;
				if ( !flagPrintJunction )
					printf( "%s", line ) ;
			}
		}
		else
		{
			++junctionCnt ;
			printf( "%s", line ) ;
		}
		//printf( "hi2 %s\n", col[0] ) ;
	}
	
	if ( flagPrintJunction )
	{
		// Print the remaining elements in the queue
		i = qHead ;
		while ( i != qTail )
		{
			PrintJunction( prevChrome, junctionQueue[i] ) ;
			++i ;
			if ( i >= QUEUE_SIZE )
				i = 0 ;
		}
	}	
	
	//fprintf( stderr, "The number of junctions: %d\n", junctionCnt ) ;
	return 0 ;
}