Mercurial > repos > yufei-luo > s_mart
view commons/pyRepetUnit/align/transformAACoordIntoNtCoord/TransformAACoordIntoNtCoordInAlignFormat.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line source
import os import sys from commons.pyRepetUnit.align.AlignListUtils import AlignListUtils from commons.core.seq.BioseqUtils import BioseqUtils from commons.core.checker.RepetException import RepetException ### Transform amino acid query coord in an align format to nucleotide coord ### according to the frame specified at the end of seqName # class TransformAACoordIntoNtCoordInAlignFormat( object ): def __init__(self): self._inFileName = None self._clean = False self._outFileName = None self._consensusFileName = None self._IsFiltered = True ## read input file, transform it and write the output file # def run(self): alignUtils = AlignListUtils() listAlignInstance = alignUtils.read(self._inFileName) self.transformQueryCoord(listAlignInstance) #self.getOriginalQueryNameForAlignList(listAlignInstance) if self._IsFiltered: alignUtils.filterOnAMinimalScore(listAlignInstance, 0) alignUtils.write(listAlignInstance, self._outFileName) if self._clean: self.clean() ## Transform the amino acid query coord into nucleotides and switch subject coord if the strand is reversed # @param listAlignInstance list of align object instance # def transformQueryCoord(self, listAlignInstance): bioseqList = BioseqUtils.extractBioseqListFromFastaFile( self._consensusFileName ) for alignInstance in listAlignInstance.getList(): try: frame = self.extractFrameFromSeqName(alignInstance) except RepetException, e: raise e previousEnd = alignInstance.range_query.end previousStart = alignInstance.range_query.start alignInstance.range_query.seqname = self._getOriginalQueryNameForAlignInstance(alignInstance) if frame < 4: self._changeStartInAAIntoNtInPositiveFrame(alignInstance, frame, previousStart) self._changeEndInAAIntoNtInPositiveFrame(alignInstance, frame, previousEnd) else: self._checkIfSeqNameIsInDNASeqFile(bioseqList, alignInstance.range_query.seqname) consensusLength = BioseqUtils.getSeqLengthWithSeqName(bioseqList, alignInstance.range_query.seqname) self._changeStartInAAIntoNtInNegativeFrame(alignInstance, frame, consensusLength, previousEnd) self._changeEndInAAIntoNtInNegativeFrame(alignInstance, frame, consensusLength, previousStart) self._invertedSubjectCoord(alignInstance) ## remove the input file # def clean(self): os.remove(self._inFileName) ## set input file name # # @param fileName string name of file # def setInFileName(self, fileName): self._inFileName = fileName ## set output file name # # @param fileName string name of file # def setOutFileName(self, fileName): self._outFileName = fileName ## set consensus file name # # @param fileName string name of file # def setConsensusFileName(self, fileName): self._consensusFileName = fileName ## set is clean will be done # # @param clean boolean clean # def setIsClean(self, clean): self._clean = clean ## get input file name # def getInFileName(self): return self._inFileName ## set is negativ score filter will be done # # @param isFiltered boolean isFiltered # def setIsFiltered(self, isFiltered): self._IsFiltered = isFiltered def _getOriginalQueryNameForAlignInstance(self, alignInstance): return alignInstance.range_query.seqname[0:len(alignInstance.range_query.seqname) - 2] def _invertedSubjectCoord(self, alignInstance): return alignInstance.range_subject.reverse() def _changeEndInAAIntoNtInPositiveFrame(self, alignInstance, frame, previousEnd): alignInstance.range_query.end = 3 * previousEnd + frame - 1 def _changeStartInAAIntoNtInPositiveFrame(self, alignInstance, frame, previousStart): alignInstance.range_query.start = 3 * (previousStart - 1) + frame def _changeEndInAAIntoNtInNegativeFrame(self, alignInstance, frame, consensusLength, previousStart): alignInstance.range_query.end = consensusLength - 3 * (previousStart - 1) - frame + 4 def _changeStartInAAIntoNtInNegativeFrame(self, alignInstance, frame, consensusLength, previousEnd): alignInstance.range_query.start = consensusLength - 3 * (previousEnd - 1) - frame + 2 def extractFrameFromSeqName(self, alignInstance): try: frame = int(alignInstance.range_query.seqname[len(alignInstance.range_query.seqname) - 1]) except ValueError: raise RepetException("Unable to extract frame from sequence name") return frame def _checkIfSeqNameIsInDNASeqFile(self, bioseqList, seqName): isSeqNameInBioseqList = False for bioseq in bioseqList: if seqName == bioseq.header: isSeqNameInBioseqList = True if not isSeqNameInBioseqList: sys.stderr.write("seqName : " + seqName + " is not in the consensus file " + self._consensusFileName + "\n") sys.exit(1)