Mercurial > repos > lsong10 > psiclass
view PsiCLASS-1.0.2/Constraints.hpp @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
line wrap: on
line source
#ifndef _MOURISL_CLASSES_CONSTRAINTS_HEADER #define _MOURISL_CLASSES_CONSTRAINTS_HEADER #include <vector> #include <map> #include <algorithm> #include <string> #include <string.h> #include "BitTable.hpp" #include "alignments.hpp" #include "SubexonGraph.hpp" struct _constraint { BitTable vector ; // subexon vector double weight ; double normAbund ; double abundance ; int support ; int uniqSupport ; int maxReadLen ; // the longest read length support this constraint. int info ; // other usages. int first, last ; // indicate the first and last index of the subexons. } ; struct _matePairConstraint { int i, j ; int support ; int uniqSupport ; double abundance ; double normAbund ; int effectiveCount ; int type ; } ; struct _readIdHeap { char *readId ; int pos ; int matePos ; int idx ; } ; //---------------------------------------------------------------------------------- // We assume the access to the data structure is sorted by matePos. // So we can "pre-cache" the ids with the same matePos. class MateReadIds { private: std::vector< struct _readIdHeap > heap ; void HeapifyUp( int tag ) { while ( tag > 1 ) { if ( heap[tag / 2].matePos < heap[tag].matePos ) return ; struct _readIdHeap tmp ; tmp = heap[tag / 2] ; heap[tag / 2] = heap[tag] ; heap[tag] = tmp ; tag /= 2 ; } } void HeapifyDown( int tag ) { int size = heap.size() ; while ( 2 * tag < size ) { int choose = 2 * tag ; if ( 2 * tag + 1 < size && heap[ 2 * tag + 1].matePos < heap[2 * tag ].matePos ) { choose = 2 * tag + 1 ; } if ( heap[tag].matePos < heap[choose].matePos ) return ; struct _readIdHeap tmp ; tmp = heap[choose] ; heap[choose] = heap[tag] ; heap[tag] = tmp ; tag = choose ; } } struct _readIdHeap Pop() { struct _readIdHeap ret ; int size = heap.size() ; if ( size < 2 ) { ret.readId = NULL ; return ret ; } ret = heap[1] ; heap[1] = heap[ heap.size() - 1] ; heap.pop_back() ; HeapifyDown( 1 ) ; return ret ; } int cachedMatePos ; std::map<std::string, int> cachedIdx ; bool hasMateReadIdSuffix ; // ignore the last ".{1,2}" or "/{1,2}" . public: MateReadIds() { // Push a dummy element so the vector becomes 1-based. struct _readIdHeap nh ; nh.readId = NULL ; nh.pos = nh.idx = nh.matePos = -1 ; heap.push_back( nh ) ; cachedMatePos = -1 ; hasMateReadIdSuffix = false ; } ~MateReadIds() { int size = heap.size() ; std::map<std::string, int>().swap( cachedIdx ) ; for ( int i = 0 ; i < size ; ++i ) if ( heap[i].readId != NULL ) free( heap[i].readId ) ; } void Clear() { std::map<std::string, int>().swap( cachedIdx ) ; int size = heap.size() ; for ( int i = 0 ; i < size ; ++i ) if ( heap[i].readId != NULL ) free( heap[i].readId ) ; std::vector<struct _readIdHeap>().swap( heap ) ; struct _readIdHeap nh ; nh.readId = NULL ; nh.pos = nh.idx = nh.matePos = -1 ; heap.push_back( nh ) ; cachedMatePos = -1 ; } void Insert( char *id, int pos, int idx, int matePos ) { struct _readIdHeap nh ; nh.readId = strdup( id ) ; nh.pos = pos ; nh.idx = idx ; nh.matePos = matePos ; heap.push_back( nh ) ; HeapifyUp( heap.size() - 1 ) ; } // If the id does not exist, return -1. int Query( char *id, int matePos ) { int size ; size = heap.size() ; if ( matePos > cachedMatePos ) { std::map<std::string, int>().swap( cachedIdx ) ; while ( size >= 2 && heap[1].matePos < matePos ) { struct _readIdHeap r = Pop() ; if ( r.readId ) { free( r.readId ) ; } --size ; } while ( size >= 2 && heap[1].matePos == matePos ) { struct _readIdHeap r = Pop() ; cachedIdx[ std::string( r.readId ) ] = r.idx ; if ( r.readId ) free( r.readId ) ; --size ; } cachedMatePos = matePos ; } std::string s( id ) ; if ( hasMateReadIdSuffix ) { int len = s.length() ; if ( len >= 2 && ( s[len - 1] == '1' || s[len - 1] == '2' ) && ( s[len - 2] == '.' || s[len - 2] == '/' ) ) { s[len - 1] = '2' - s[len - 1] + '1' ; } } if ( cachedIdx.find( s ) != cachedIdx.end() ) { return cachedIdx[s] ; } return -1 ; } void UpdateIdx( std::vector<int> &newIdx ) { int size = heap.size() ; int i ; for ( i = 1 ; i < size ; ++i ) { heap[i].idx = newIdx[ heap[i].idx ] ; } for ( std::map<std::string, int>::iterator it = cachedIdx.begin() ; it != cachedIdx.end() ; ++it ) it->second = newIdx[ it->second ] ; } void SetHasMateReadIdSuffix( bool in ) { hasMateReadIdSuffix = true ; } } ; //-------------------------------------------------------------------------- class Constraints { private: int prevStart, prevEnd ; bool usePrimaryAsUnique ; MateReadIds mateReadIds ; Alignments *pAlignments ; //@return: whether this alignment is compatible with the subexons or not. bool ConvertAlignmentToBitTable( struct _pair *segments, int segCnt, struct _subexon *subexons, int seCnt, int seStart, struct _constraint &ct ) ; // Sort to increasing order. Since the first subexon occupies the least important digit. static bool CompSortConstraints( const struct _constraint &a, const struct _constraint &b ) { //int k if ( a.first < b.first ) return true ; else if ( a.first > b.first ) return false ; int diffPos = a.vector.GetFirstDifference( b.vector ) ; if ( diffPos == -1 ) // case of equal. return false ; if ( a.vector.Test( diffPos )) return false ; else return true ; } static bool CompSortMatePairs( const struct _matePairConstraint &a, const struct _matePairConstraint &b ) { if ( a.i < b.i ) return true ; else if ( a.i > b.i ) return false ; else { if ( a.j < b.j ) return true ; else return false ; } } void CoalesceSameConstraints() ; void ComputeNormAbund( struct _subexon *subexons ) ; public: std::vector<struct _constraint> constraints ; std::vector<struct _matePairConstraint> matePairs ; Constraints() { usePrimaryAsUnique = false ; } Constraints( Alignments *a ): pAlignments( a ) { } ~Constraints() { int i ; int size = constraints.size() ; for ( i = 0 ; i < size ; ++i ) constraints[i].vector.Release() ; constraints.clear() ; std::vector<struct _constraint>().swap( constraints ) ; matePairs.clear() ; std::vector<struct _matePairConstraint>().swap( matePairs ) ; } void Clear() { //TODO: do I need to release the memory from BitTable? constraints.clear() ; } void SetAlignments( Alignments *a ) { pAlignments = a ; } void SetUsePrimaryAsUnique( bool in ) { usePrimaryAsUnique = in ; } void Assign( Constraints &c ) { int i ; int size = constraints.size() ; if ( size > 0 ) { for ( i = 0 ; i < size ; ++i ) constraints[i].vector.Release() ; constraints.clear() ; std::vector<struct _constraint>().swap( constraints ) ; } matePairs.clear() ; std::vector<struct _matePairConstraint>().swap( matePairs ) ; //constraints.resize( c.constraints.size() ) ; constraints = c.constraints ; size = c.constraints.size() ; for ( i = 0 ; i < size ; ++i ) { /*struct _constraint nc ; nc.weight = c.constraints[i].weight ; nc.normAbund = c.constraints[i].normAbund ; nc.abundance = c.constraints[i].abundance ; nc.support = c.constraints[i].support ; nc.uniqSupport = c.constraints[i].uniqSupport ; nc.maxReadLen = c.constraints[i].maxReadLen ; nc.info = c.constraints[i].info ; nc.first = c.constraints[i].first ; nc.last = c.constraints[i].last ; nc.vector.Duplicate( c.constraints[i].vector ) ; constraints[i] = ( nc ) ; */ constraints[i].vector.Nullify() ; // so that it won't affect the BitTable in "c" constraints[i].vector.Duplicate( c.constraints[i].vector ) ; } matePairs = c.matePairs ; pAlignments = c.pAlignments ; } void DownsampleConstraintsFrom( Constraints &c, int stride = 10 ) { int i ; int size = constraints.size(), k ; if ( size > 0 ) { for ( i = 0 ; i < size ; ++i ) constraints[i].vector.Release() ; constraints.clear() ; std::vector<struct _constraint>().swap( constraints ) ; } matePairs.clear() ; std::vector<struct _matePairConstraint>().swap( matePairs ) ; //constraints.resize( c.constraints.size() ) ; //constraints = c.constraints ; k = 0 ; size = c.constraints.size() ; for ( i = 0 ; i < size ; i += stride, ++k ) { constraints.push_back( c.constraints[i] ) ; constraints[k].vector.Nullify() ; // so that it won't affect the BitTable in "c" constraints[k].vector.Duplicate( c.constraints[i].vector ) ; /*std::vector<int> seIdx ; constraints[k].vector.GetOnesIndices( seIdx ) ; int j, l = seIdx.size() ; for ( j = 2 ; j < l ; ++j ) { constraints[k].vector.Unset( seIdx[j] ) ; } constraints[k].last = seIdx[1] ;*/ } // mate pairs is not used. if we down-sampling pAlignments = c.pAlignments ; } void TruncateConstraintsCoverFrom( Constraints &c, int seCnt, int maxConstraintSize ) { int i ; int size = constraints.size() ; if ( size > 0 ) { for ( i = 0 ; i < size ; ++i ) constraints[i].vector.Release() ; constraints.clear() ; std::vector<struct _constraint>().swap( constraints ) ; } matePairs.clear() ; std::vector<struct _matePairConstraint>().swap( matePairs ) ; //constraints.resize( c.constraints.size() ) ; //constraints = c.constraints ; size = c.constraints.size() ; for ( i = 0 ; i < size ; ++i ) { constraints.push_back( c.constraints[i] ) ; constraints[i].vector.Nullify() ; // so that it won't affect the BitTable in "c" constraints[i].vector.Init( seCnt ) ; std::vector<int> seIdx ; c.constraints[i].vector.GetOnesIndices( seIdx ) ; int j, l = seIdx.size() ; for ( j = 0 ; j < maxConstraintSize && j < l ; ++j ) { constraints[i].vector.Set( seIdx[j] ) ; } constraints[i].last = seIdx[j - 1] ; } // mate pairs is not used. if we down-sampling pAlignments = c.pAlignments ; } void SetHasMateReadIdSuffix( bool in ) { mateReadIds.SetHasMateReadIdSuffix( in ) ; } int BuildConstraints( struct _subexon *subexons, int seCnt, int start, int end ) ; } ; #endif