view commons/core/parsing/VarscanHitForGnpSNP.py @ 9:1eb55963fe39

Updated CompareOverlappingSmall*.py
author m-zytnicki
date Thu, 14 Mar 2013 05:23:05 -0400
parents 769e306b7933
children
line wrap: on
line source

# Copyright INRA (Institut National de la Recherche Agronomique)
# http://www.inra.fr
# http://urgi.versailles.inra.fr
#
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software.  You can  use, 
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info". 
#
# As a counterpart to the access to the source code and  rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty  and the software's author,  the holder of the
# economic rights,  and the successive licensors  have only  limited
# liability. 
#
# In this respect, the user's attention is drawn to the risks associated
# with loading,  using,  modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean  that it is complicated to manipulate,  and  that  also
# therefore means  that it is reserved for developers  and  experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or 
# data to be ensured and,  more generally, to use and operate it in the 
# same conditions as regards security. 
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.


from commons.core.checker.CheckerException import CheckerException
from commons.core.parsing.VarscanHit import VarscanHit
import re

class VarscanHitForGnpSNP(VarscanHit):
    
    def __init__(self):
        VarscanHit.__init__(self)
        self._reads1 = ''
        self._reads2 = ''
        self._varFreq = ''
        self._strands1 = ''
        self._strands2 = ''
        self._qual1 = ''
        self._qual2 = ''
        self._pvalue = ''
        self._5flank = ''
        self._3flank = ''
        self._gnpSnp_ref = ''
        self._gnpSnp_var = ''
        self._gnpSnp_position = 0
        self._polymType = ''
        self._polymLength = 0
        self._occurrence = 1
        
    ## Equal operator
    #
    # @param o a VarscanFileAnalysis instance
    #    
    def __eq__(self, o):
        return VarscanHit.__eq__(self, o) \
             and self._reads1 == o._reads1 and self._reads2 == o._reads2 \
             and self._varFreq == o._varFreq and self._strands1 == o._strands1 \
             and self._strands2 == o._strands2 and self._qual1 == o._qual1 \
             and self._qual2 == o._qual2 and self._pvalue == o._pvalue \
             and self._3flank == o._3flank and self._5flank == o._5flank \
             and self._gnpSnp_position == o._gnpSnp_position and self._gnpSnp_ref == o._gnpSnp_ref \
             and self._gnpSnp_var == o._gnpSnp_var and self._polymLength == o._polymLength \
             and self._polymType == o._polymType and self._occurrence == o._occurrence
    
    def isPolymTypeAlreadyFoundAtThisChromAndThisPosition(self, iVarscanHitForGnpSNP):
        return self._chrom == iVarscanHitForGnpSNP.getChrom() \
            and self._position == iVarscanHitForGnpSNP.getPosition() \
            and self._polymType == iVarscanHitForGnpSNP.getPolymType()
            
    def manageOccurrence(self, iVarscanHitForGnpSNP=None):
        if iVarscanHitForGnpSNP != None and self.isPolymTypeAlreadyFoundAtThisChromAndThisPosition(iVarscanHitForGnpSNP):
            self._occurrence = iVarscanHitForGnpSNP.getOccurrence() + 1
    
    def formatAlleles2GnpSnp(self):
        if self.getVar().find("-") != -1:
            self._polymType = "DELETION"
            self._gnpSnp_position = int(self._position) + 1
            self._gnpSnp_ref = self._var[1:]
            self._gnpSnp_var = "-" * len(self._gnpSnp_ref)
            self._polymLength = len(self._gnpSnp_ref)
        elif self.getVar().find("+") != -1:
            self._polymType = "INSERTION"
            self._gnpSnp_position = int(self._position)
            self._gnpSnp_var = self._var[1:]
            self._gnpSnp_ref = "-" * len(self._gnpSnp_var)
            self._polymLength = 1
        else:
            self._polymType = "SNP"
            self._gnpSnp_position = int(self._position)
            self._gnpSnp_var = self._var
            self._gnpSnp_ref = self._ref
            self._polymLength = 1
    
    def setReads1(self, nbReadsLikeRef):
        self._reads1 = nbReadsLikeRef
    
    def setReads2(self, nbReadsLikeVar):
        self._reads2 = nbReadsLikeVar
    
    def setVarFreq(self, frequencyOfVariantAllele):
        frequencyOfVariantAllele = frequencyOfVariantAllele.replace("%","")
        frequencyOfVariantAllele = frequencyOfVariantAllele.replace(",",".")
        self._varFreq = float(frequencyOfVariantAllele)
    
    def setStrands1(self, strandsOfReferenceAllele):
        self._strands1 = strandsOfReferenceAllele
    
    def setStrands2(self, strandsOfVariantAllele):
        self._strands2 = strandsOfVariantAllele
    
    def setQual1(self, averageQualityOfRef):
        self._qual1 = averageQualityOfRef
    
    def setQual2(self, averageQualityOfVar):
        self._qual2 = averageQualityOfVar
    
    def setPvalue(self, pvalue):
        self._pvalue = pvalue
    
    def set5flank(self, s5flank):
        self._5flank = s5flank
    
    def set3flank(self, s3flank):
        self._3flank = s3flank
        
    def setGnpSNPRef(self, ref):
        self._gnpSnp_ref = ref
        
    def setGnpSNPVar(self, var):
        self._gnpSnp_var = var
        
    def setGnpSNPPosition(self, position):
        self._gnpSnp_position = position
    
    def setOccurrence(self, occurrence):
        self._occurrence = occurrence
        
    def setPolymType(self, polymType):
        self._polymType = polymType
        
    def setPolymLength(self, polymLength):
        self._polymLength = polymLength
    
    def getReads1(self):
        return self._reads1
    
    def getReads2(self):
        return self._reads2
    
    def getVarFreq(self):
        return self._varFreq
    
    def getStrands1(self):
        return self._strands1
    
    def getStrands2(self):
        return self._strands2
    
    def getQual1(self):
        return self._qual1
    
    def getQual2(self):
        return self._qual2
    
    def getPvalue(self):
        return self._pvalue
    
    def get5flank(self):
        return self._5flank
    
    def get3flank(self):
        return self._3flank
    
    def getPolymType(self):
        return self._polymType
    
    def getGnpSnpVar(self):
        return self._gnpSnp_var
    
    def getGnpSnpRef(self):
        return self._gnpSnp_ref
    
    def getGnpSnpPosition(self):
        return self._gnpSnp_position
    
    def getPolymLength(self):
        return self._polymLength
    
    def getOccurrence(self):
        return self._occurrence
    
    def setAttributes(self, lResults, iCurrentLineNumber):
        VarscanHit.setAttributes(self, lResults, iCurrentLineNumber)
        if lResults[4] != '':
            self.setReads1(lResults[4])
        else:
            raise CheckerException ("The field Reads1 is empty in varscan file in line %s" % (iCurrentLineNumber))
        if lResults[5] != '':
            self.setReads2(lResults[5])
        else:
            raise CheckerException ("The field Reads2 is empty in varscan file in line %s" % (iCurrentLineNumber))
        if lResults[6] != '' and re.match("[0-9\,\%]+", lResults[6]):
            self.setVarFreq(lResults[6])
        else:
            raise CheckerException ("The field VarFreq is empty or in bad format in varscan file in line %s" % (iCurrentLineNumber))
        if lResults[7] != '':
            self.setStrands1(lResults[7])
        else:
            raise CheckerException ("The field Strands1 is empty in varscan file in line %s" % (iCurrentLineNumber))
        if lResults[8] != '':
            self.setStrands2(lResults[8])
        else:
            raise CheckerException ("The field Strands2 is empty in varscan file in line %s" % (iCurrentLineNumber))
        if lResults[9] != '':
            self.setQual1(lResults[9])
        else:
            raise CheckerException ("The field Qual1 is empty in varscan file in line %s" % (iCurrentLineNumber))
        if lResults[10] != '':
            self.setQual2(lResults[10])
        else:
            raise CheckerException ("The field Qual2 is empty in varscan file in line %s" % (iCurrentLineNumber))
        if lResults[11] != '':
            self.setPvalue(lResults[11])
        else:
            raise CheckerException ("The field Pvalue is empty in varscan file in line %s" % (iCurrentLineNumber))