view smart_toolShed/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line source

import unittest
import os

from commons.core.parsing.VarscanFileForGnpSNP import VarscanFileForGnpSNP
from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP

class Test_VarscanFileForGnpSNP(unittest.TestCase):

    def test__init__(self):
        expFastqFileName = "SR.fastq"
        expRefFastaFileName = "ref.fasta"
        expTaxonName = "Arabidopsis thaliana"
        expVarscanFieldSeparator = "\t"
        expVarscanHitsList = []
        
        iVarscanFileForGnpSNP = VarscanFileForGnpSNP("", expFastqFileName, expRefFastaFileName, expTaxonName)
        
        obsFastaqFileName = iVarscanFileForGnpSNP.getFastqFileName()
        obsRefFastaFileName = iVarscanFileForGnpSNP.getRefFastaFileName()
        obsTaxonName = iVarscanFileForGnpSNP.getTaxonName()
        obsVarscanFieldSeparator = iVarscanFileForGnpSNP.getVarscanFieldSeparator()
        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
        
        self.assertEquals(expFastqFileName, obsFastaqFileName)
        self.assertEquals(expRefFastaFileName, obsRefFastaFileName)
        self.assertEquals(expTaxonName, obsTaxonName)
        self.assertEquals(expVarscanFieldSeparator, obsVarscanFieldSeparator)
        self.assertEquals(expVarscanHitsList, obsVarscanHitsList)

    def test_parse(self):
        varscanFileName = "varscan.tab"
        self._writeVarscanFile(varscanFileName)
        
        varscanHit1 = VarscanHitForGnpSNP()
        varscanHit1.setChrom('C02HBa0291P19_LR48')
        varscanHit1.setPosition('32')
        varscanHit1.setRef('C')
        varscanHit1.setVar('T')
        varscanHit1.setReads1('1')
        varscanHit1.setReads2('2')
        varscanHit1.setVarFreq('66,67%')
        varscanHit1.setStrands1('1')
        varscanHit1.setStrands2('1')
        varscanHit1.setQual1('37')
        varscanHit1.setQual2('35')
        varscanHit1.setPvalue('0.3999999999999999')
        varscanHit1.setGnpSNPRef("C")
        varscanHit1.setGnpSNPVar("T")
        varscanHit1.setGnpSNPPosition(32)
        varscanHit1.setOccurrence(1)
        varscanHit1.setPolymType("SNP")
        varscanHit1.setPolymLength(1)
        
        varscanHit2 = VarscanHitForGnpSNP()
        varscanHit2.setChrom('C02HBa0291P19_LR48')
        varscanHit2.setPosition('34')
        varscanHit2.setRef('A')
        varscanHit2.setVar('T')
        varscanHit2.setReads1('1')
        varscanHit2.setReads2('2')
        varscanHit2.setVarFreq('66,67%')
        varscanHit2.setStrands1('1')
        varscanHit2.setStrands2('1')
        varscanHit2.setQual1('40')
        varscanHit2.setQual2('34')
        varscanHit2.setPvalue('0.3999999999999999')
        varscanHit2.setGnpSNPRef("A")
        varscanHit2.setGnpSNPVar("T")
        varscanHit2.setGnpSNPPosition(34)
        varscanHit2.setOccurrence(1)
        varscanHit2.setPolymType("SNP")
        varscanHit2.setPolymLength(1)
        expVarscanHitsList = [varscanHit1, varscanHit2]
        
        iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '')
        iVarscanFileForGnpSNP.parse()
        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
        os.remove(varscanFileName)
        
        self.assertEquals(expVarscanHitsList, obsVarscanHitsList)
    
    def test_parse_with_same_position_and_chr_and_type(self):
        varscanFileName = "varscan.tab"
        self._writeVarscanFile_2(varscanFileName)
        
        varscanHit1 = VarscanHitForGnpSNP()
        varscanHit1.setChrom('C02HBa0291P19_LR48')
        varscanHit1.setPosition('32')
        varscanHit1.setRef('C')
        varscanHit1.setVar('T')
        varscanHit1.setReads1('1')
        varscanHit1.setReads2('2')
        varscanHit1.setVarFreq('66,67%')
        varscanHit1.setStrands1('1')
        varscanHit1.setStrands2('1')
        varscanHit1.setQual1('37')
        varscanHit1.setQual2('35')
        varscanHit1.setPvalue('0.3999999999999999')
        varscanHit1.setOccurrence(1)
        varscanHit1._polymType = "SNP"
        varscanHit1._gnpSnp_position = 32
        varscanHit1._gnpSnp_ref = "C"
        varscanHit1._gnpSnp_var = "T"
        
        varscanHit2 = VarscanHitForGnpSNP()
        varscanHit2.setChrom('C02HBa0291P19_LR48')
        varscanHit2.setPosition('32')
        varscanHit2.setRef('C')
        varscanHit2.setVar('A')
        varscanHit2.setReads1('1')
        varscanHit2.setReads2('2')
        varscanHit2.setVarFreq('66,67%')
        varscanHit2.setStrands1('1')
        varscanHit2.setStrands2('1')
        varscanHit2.setQual1('37')
        varscanHit2.setQual2('35')
        varscanHit2.setPvalue('0.3999999999999999')
        varscanHit2.setOccurrence(2)
        varscanHit2._polymType = "SNP"
        varscanHit2._gnpSnp_position = 32
        varscanHit2._gnpSnp_ref = "C"
        varscanHit2._gnpSnp_var = "T"
        
        expVarscanHitsOccurence = varscanHit2._occurrence
        
        iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '')
        iVarscanFileForGnpSNP.parse()
        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
        obsVarscanHitsOccurence = obsVarscanHitsList[1]._occurrence
        os.remove(varscanFileName)
        
        self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence)    
        
    def test_parse_with_same_position_and_chr_and_different_type(self):
        varscanFileName = "varscan.tab"
        self._writeVarscanFile_3(varscanFileName)
        
        varscanHit1 = VarscanHitForGnpSNP()
        varscanHit1.setChrom('C02HBa0291P19_LR48')
        varscanHit1.setPosition('32')
        varscanHit1.setRef('C')
        varscanHit1.setVar('T')
        varscanHit1.setReads1('1')
        varscanHit1.setReads2('2')
        varscanHit1.setVarFreq('66,67%')
        varscanHit1.setStrands1('1')
        varscanHit1.setStrands2('1')
        varscanHit1.setQual1('37')
        varscanHit1.setQual2('35')
        varscanHit1.setPvalue('0.3999999999999999')
        varscanHit1.setOccurrence(1)
        varscanHit1._polymType = "SNP"
        varscanHit1._gnpSnp_position = 32
        varscanHit1._gnpSnp_ref = "C"
        varscanHit1._gnpSnp_var = "T"
        
        varscanHit2 = VarscanHitForGnpSNP()
        varscanHit2.setChrom('C02HBa0291P19_LR48')
        varscanHit2.setPosition('32')
        varscanHit2.setRef('C')
        varscanHit2.setVar('+A')
        varscanHit2.setReads1('1')
        varscanHit2.setReads2('2')
        varscanHit2.setVarFreq('66,67%')
        varscanHit2.setStrands1('1')
        varscanHit2.setStrands2('1')
        varscanHit2.setQual1('37')
        varscanHit2.setQual2('35')
        varscanHit2.setPvalue('0.3999999999999999')
        varscanHit2.setOccurrence(1)
        varscanHit2._polymType = "SNP"
        varscanHit2._gnpSnp_position = 32
        varscanHit2._gnpSnp_ref = "C"
        varscanHit2._gnpSnp_var = "T"
        
        expVarscanHitsOccurence = varscanHit2._occurrence
        
        iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName)
        iVarscanFileForGnpSNP.parse()
        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
        obsVarscanHitsOccurence = obsVarscanHitsList[1].getOccurrence()
        os.remove(varscanFileName)
        
        self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence)    
        
    def test_parse_on_occurence(self):
        varscanFileName = "varscan.tab"
        self._writeVarscanFile_4(varscanFileName)
        
        expOccurrence1 = 1
        expOccurrence2 = 1
        expOccurrence3 = 2
        expOccurrence4 = 1
        expOccurrence5 = 1
        expOccurrence6 = 2
        
        iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName)
        iVarscanFileForGnpSNP.parse()
        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
        obsOccurrence1 = obsVarscanHitsList[0].getOccurrence()
        obsOccurrence2 = obsVarscanHitsList[1].getOccurrence()
        obsOccurrence3 = obsVarscanHitsList[2].getOccurrence()
        obsOccurrence4 = obsVarscanHitsList[3].getOccurrence()
        obsOccurrence5 = obsVarscanHitsList[4].getOccurrence()
        obsOccurrence6 = obsVarscanHitsList[5].getOccurrence()
        os.remove(varscanFileName)
        
        self.assertEquals(expOccurrence1, obsOccurrence1)
        self.assertEquals(expOccurrence2, obsOccurrence2)
        self.assertEquals(expOccurrence3, obsOccurrence3)
        self.assertEquals(expOccurrence4, obsOccurrence4)
        self.assertEquals(expOccurrence5, obsOccurrence5)
        self.assertEquals(expOccurrence6, obsOccurrence6)
        
    def test__eq__notEqual(self):
        fastqFileName = "SR.fastq"
        refFastaFileName = "ref.fasta"
        taxonName = "Arabidopsis thaliana"
        
        iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
        
        fastqFileName = "SR.fastq2"
        refFastaFileName = "ref.fasta"
        taxonName = "Arabidopsis thaliana"
        
        iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)

        self.assertFalse(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)
        
    def test__eq__equal(self):
        fastqFileName = "SR.fastq"
        refFastaFileName = "ref.fasta"
        taxonName = "Arabidopsis thaliana"
        
        varscanHit1 = VarscanHitForGnpSNP()
        varscanHit1.setChrom('C02HBa0291P19_LR48')
        varscanHit1.setPosition('34')
        varscanHit1.setRef('A')
        varscanHit1.setVar('T')
        varscanHit1.setReads1('1')
        varscanHit1.setReads2('2')
        varscanHit1.setVarFreq('66,67%')
        varscanHit1.setStrands1('1')
        varscanHit1.setStrands2('1')
        varscanHit1.setQual1('40')
        varscanHit1.setQual2('34')
        varscanHit1.setPvalue('0.3999999999999999')
        lVarscanHits1 = [varscanHit1]
        
        iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
        iVarscanFileForGnpSNP1.setVarscanHitsList(lVarscanHits1)
        
        varscanHit2 = VarscanHitForGnpSNP()
        varscanHit2.setChrom('C02HBa0291P19_LR48')
        varscanHit2.setPosition('34')
        varscanHit2.setRef('A')
        varscanHit2.setVar('T')
        varscanHit2.setReads1('1')
        varscanHit2.setReads2('2')
        varscanHit2.setVarFreq('66,67%')
        varscanHit2.setStrands1('1')
        varscanHit2.setStrands2('1')
        varscanHit2.setQual1('40')
        varscanHit2.setQual2('34')
        varscanHit2.setPvalue('0.3999999999999999')
        lVarscanHits2 = [varscanHit2]

        iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
        iVarscanFileForGnpSNP2.setVarscanHitsList(lVarscanHits2)

        self.assertTrue(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)
    
    def _writeVarscanFile(self, varscanFileName):
        varscanFile = open(varscanFileName, 'w')
        varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
        varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.write("C02HBa0291P19_LR48\t34\tA\tT\t1\t2\t66,67%\t1\t1\t40\t34\t0.3999999999999999\n")
        varscanFile.close()
    
    def _writeVarscanFile_2(self, varscanFileName):
        varscanFile = open(varscanFileName, 'w')
        varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
        varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.write("C02HBa0291P19_LR48\t32\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.close()
        
    def _writeVarscanFile_3(self, varscanFileName):
        varscanFile = open(varscanFileName, 'w')
        varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
        varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.write("C02HBa0291P19_LR48\t32\tC\t+A\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.close()

    def _writeVarscanFile_4(self, varscanFileName):
        varscanFile = open(varscanFileName, 'w')
        varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
        varscanFile.write("seqname\t2\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.write("seqname\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.write("seqname\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.write("seqname\t8\tT\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.write("chrom\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.write("chrom\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
        varscanFile.close()
if __name__ == "__main__":
    unittest.main()