diff smart_toolShed/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smart_toolShed/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py	Thu Jan 17 10:52:14 2013 -0500
@@ -0,0 +1,305 @@
+import unittest
+import os
+
+from commons.core.parsing.VarscanFileForGnpSNP import VarscanFileForGnpSNP
+from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP
+
+class Test_VarscanFileForGnpSNP(unittest.TestCase):
+
+    def test__init__(self):
+        expFastqFileName = "SR.fastq"
+        expRefFastaFileName = "ref.fasta"
+        expTaxonName = "Arabidopsis thaliana"
+        expVarscanFieldSeparator = "\t"
+        expVarscanHitsList = []
+        
+        iVarscanFileForGnpSNP = VarscanFileForGnpSNP("", expFastqFileName, expRefFastaFileName, expTaxonName)
+        
+        obsFastaqFileName = iVarscanFileForGnpSNP.getFastqFileName()
+        obsRefFastaFileName = iVarscanFileForGnpSNP.getRefFastaFileName()
+        obsTaxonName = iVarscanFileForGnpSNP.getTaxonName()
+        obsVarscanFieldSeparator = iVarscanFileForGnpSNP.getVarscanFieldSeparator()
+        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
+        
+        self.assertEquals(expFastqFileName, obsFastaqFileName)
+        self.assertEquals(expRefFastaFileName, obsRefFastaFileName)
+        self.assertEquals(expTaxonName, obsTaxonName)
+        self.assertEquals(expVarscanFieldSeparator, obsVarscanFieldSeparator)
+        self.assertEquals(expVarscanHitsList, obsVarscanHitsList)
+
+    def test_parse(self):
+        varscanFileName = "varscan.tab"
+        self._writeVarscanFile(varscanFileName)
+        
+        varscanHit1 = VarscanHitForGnpSNP()
+        varscanHit1.setChrom('C02HBa0291P19_LR48')
+        varscanHit1.setPosition('32')
+        varscanHit1.setRef('C')
+        varscanHit1.setVar('T')
+        varscanHit1.setReads1('1')
+        varscanHit1.setReads2('2')
+        varscanHit1.setVarFreq('66,67%')
+        varscanHit1.setStrands1('1')
+        varscanHit1.setStrands2('1')
+        varscanHit1.setQual1('37')
+        varscanHit1.setQual2('35')
+        varscanHit1.setPvalue('0.3999999999999999')
+        varscanHit1.setGnpSNPRef("C")
+        varscanHit1.setGnpSNPVar("T")
+        varscanHit1.setGnpSNPPosition(32)
+        varscanHit1.setOccurrence(1)
+        varscanHit1.setPolymType("SNP")
+        varscanHit1.setPolymLength(1)
+        
+        varscanHit2 = VarscanHitForGnpSNP()
+        varscanHit2.setChrom('C02HBa0291P19_LR48')
+        varscanHit2.setPosition('34')
+        varscanHit2.setRef('A')
+        varscanHit2.setVar('T')
+        varscanHit2.setReads1('1')
+        varscanHit2.setReads2('2')
+        varscanHit2.setVarFreq('66,67%')
+        varscanHit2.setStrands1('1')
+        varscanHit2.setStrands2('1')
+        varscanHit2.setQual1('40')
+        varscanHit2.setQual2('34')
+        varscanHit2.setPvalue('0.3999999999999999')
+        varscanHit2.setGnpSNPRef("A")
+        varscanHit2.setGnpSNPVar("T")
+        varscanHit2.setGnpSNPPosition(34)
+        varscanHit2.setOccurrence(1)
+        varscanHit2.setPolymType("SNP")
+        varscanHit2.setPolymLength(1)
+        expVarscanHitsList = [varscanHit1, varscanHit2]
+        
+        iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '')
+        iVarscanFileForGnpSNP.parse()
+        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
+        os.remove(varscanFileName)
+        
+        self.assertEquals(expVarscanHitsList, obsVarscanHitsList)
+    
+    def test_parse_with_same_position_and_chr_and_type(self):
+        varscanFileName = "varscan.tab"
+        self._writeVarscanFile_2(varscanFileName)
+        
+        varscanHit1 = VarscanHitForGnpSNP()
+        varscanHit1.setChrom('C02HBa0291P19_LR48')
+        varscanHit1.setPosition('32')
+        varscanHit1.setRef('C')
+        varscanHit1.setVar('T')
+        varscanHit1.setReads1('1')
+        varscanHit1.setReads2('2')
+        varscanHit1.setVarFreq('66,67%')
+        varscanHit1.setStrands1('1')
+        varscanHit1.setStrands2('1')
+        varscanHit1.setQual1('37')
+        varscanHit1.setQual2('35')
+        varscanHit1.setPvalue('0.3999999999999999')
+        varscanHit1.setOccurrence(1)
+        varscanHit1._polymType = "SNP"
+        varscanHit1._gnpSnp_position = 32
+        varscanHit1._gnpSnp_ref = "C"
+        varscanHit1._gnpSnp_var = "T"
+        
+        varscanHit2 = VarscanHitForGnpSNP()
+        varscanHit2.setChrom('C02HBa0291P19_LR48')
+        varscanHit2.setPosition('32')
+        varscanHit2.setRef('C')
+        varscanHit2.setVar('A')
+        varscanHit2.setReads1('1')
+        varscanHit2.setReads2('2')
+        varscanHit2.setVarFreq('66,67%')
+        varscanHit2.setStrands1('1')
+        varscanHit2.setStrands2('1')
+        varscanHit2.setQual1('37')
+        varscanHit2.setQual2('35')
+        varscanHit2.setPvalue('0.3999999999999999')
+        varscanHit2.setOccurrence(2)
+        varscanHit2._polymType = "SNP"
+        varscanHit2._gnpSnp_position = 32
+        varscanHit2._gnpSnp_ref = "C"
+        varscanHit2._gnpSnp_var = "T"
+        
+        expVarscanHitsOccurence = varscanHit2._occurrence
+        
+        iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '')
+        iVarscanFileForGnpSNP.parse()
+        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
+        obsVarscanHitsOccurence = obsVarscanHitsList[1]._occurrence
+        os.remove(varscanFileName)
+        
+        self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence)    
+        
+    def test_parse_with_same_position_and_chr_and_different_type(self):
+        varscanFileName = "varscan.tab"
+        self._writeVarscanFile_3(varscanFileName)
+        
+        varscanHit1 = VarscanHitForGnpSNP()
+        varscanHit1.setChrom('C02HBa0291P19_LR48')
+        varscanHit1.setPosition('32')
+        varscanHit1.setRef('C')
+        varscanHit1.setVar('T')
+        varscanHit1.setReads1('1')
+        varscanHit1.setReads2('2')
+        varscanHit1.setVarFreq('66,67%')
+        varscanHit1.setStrands1('1')
+        varscanHit1.setStrands2('1')
+        varscanHit1.setQual1('37')
+        varscanHit1.setQual2('35')
+        varscanHit1.setPvalue('0.3999999999999999')
+        varscanHit1.setOccurrence(1)
+        varscanHit1._polymType = "SNP"
+        varscanHit1._gnpSnp_position = 32
+        varscanHit1._gnpSnp_ref = "C"
+        varscanHit1._gnpSnp_var = "T"
+        
+        varscanHit2 = VarscanHitForGnpSNP()
+        varscanHit2.setChrom('C02HBa0291P19_LR48')
+        varscanHit2.setPosition('32')
+        varscanHit2.setRef('C')
+        varscanHit2.setVar('+A')
+        varscanHit2.setReads1('1')
+        varscanHit2.setReads2('2')
+        varscanHit2.setVarFreq('66,67%')
+        varscanHit2.setStrands1('1')
+        varscanHit2.setStrands2('1')
+        varscanHit2.setQual1('37')
+        varscanHit2.setQual2('35')
+        varscanHit2.setPvalue('0.3999999999999999')
+        varscanHit2.setOccurrence(1)
+        varscanHit2._polymType = "SNP"
+        varscanHit2._gnpSnp_position = 32
+        varscanHit2._gnpSnp_ref = "C"
+        varscanHit2._gnpSnp_var = "T"
+        
+        expVarscanHitsOccurence = varscanHit2._occurrence
+        
+        iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName)
+        iVarscanFileForGnpSNP.parse()
+        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
+        obsVarscanHitsOccurence = obsVarscanHitsList[1].getOccurrence()
+        os.remove(varscanFileName)
+        
+        self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence)    
+        
+    def test_parse_on_occurence(self):
+        varscanFileName = "varscan.tab"
+        self._writeVarscanFile_4(varscanFileName)
+        
+        expOccurrence1 = 1
+        expOccurrence2 = 1
+        expOccurrence3 = 2
+        expOccurrence4 = 1
+        expOccurrence5 = 1
+        expOccurrence6 = 2
+        
+        iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName)
+        iVarscanFileForGnpSNP.parse()
+        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
+        obsOccurrence1 = obsVarscanHitsList[0].getOccurrence()
+        obsOccurrence2 = obsVarscanHitsList[1].getOccurrence()
+        obsOccurrence3 = obsVarscanHitsList[2].getOccurrence()
+        obsOccurrence4 = obsVarscanHitsList[3].getOccurrence()
+        obsOccurrence5 = obsVarscanHitsList[4].getOccurrence()
+        obsOccurrence6 = obsVarscanHitsList[5].getOccurrence()
+        os.remove(varscanFileName)
+        
+        self.assertEquals(expOccurrence1, obsOccurrence1)
+        self.assertEquals(expOccurrence2, obsOccurrence2)
+        self.assertEquals(expOccurrence3, obsOccurrence3)
+        self.assertEquals(expOccurrence4, obsOccurrence4)
+        self.assertEquals(expOccurrence5, obsOccurrence5)
+        self.assertEquals(expOccurrence6, obsOccurrence6)
+        
+    def test__eq__notEqual(self):
+        fastqFileName = "SR.fastq"
+        refFastaFileName = "ref.fasta"
+        taxonName = "Arabidopsis thaliana"
+        
+        iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
+        
+        fastqFileName = "SR.fastq2"
+        refFastaFileName = "ref.fasta"
+        taxonName = "Arabidopsis thaliana"
+        
+        iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
+
+        self.assertFalse(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)
+        
+    def test__eq__equal(self):
+        fastqFileName = "SR.fastq"
+        refFastaFileName = "ref.fasta"
+        taxonName = "Arabidopsis thaliana"
+        
+        varscanHit1 = VarscanHitForGnpSNP()
+        varscanHit1.setChrom('C02HBa0291P19_LR48')
+        varscanHit1.setPosition('34')
+        varscanHit1.setRef('A')
+        varscanHit1.setVar('T')
+        varscanHit1.setReads1('1')
+        varscanHit1.setReads2('2')
+        varscanHit1.setVarFreq('66,67%')
+        varscanHit1.setStrands1('1')
+        varscanHit1.setStrands2('1')
+        varscanHit1.setQual1('40')
+        varscanHit1.setQual2('34')
+        varscanHit1.setPvalue('0.3999999999999999')
+        lVarscanHits1 = [varscanHit1]
+        
+        iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
+        iVarscanFileForGnpSNP1.setVarscanHitsList(lVarscanHits1)
+        
+        varscanHit2 = VarscanHitForGnpSNP()
+        varscanHit2.setChrom('C02HBa0291P19_LR48')
+        varscanHit2.setPosition('34')
+        varscanHit2.setRef('A')
+        varscanHit2.setVar('T')
+        varscanHit2.setReads1('1')
+        varscanHit2.setReads2('2')
+        varscanHit2.setVarFreq('66,67%')
+        varscanHit2.setStrands1('1')
+        varscanHit2.setStrands2('1')
+        varscanHit2.setQual1('40')
+        varscanHit2.setQual2('34')
+        varscanHit2.setPvalue('0.3999999999999999')
+        lVarscanHits2 = [varscanHit2]
+
+        iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
+        iVarscanFileForGnpSNP2.setVarscanHitsList(lVarscanHits2)
+
+        self.assertTrue(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)
+    
+    def _writeVarscanFile(self, varscanFileName):
+        varscanFile = open(varscanFileName, 'w')
+        varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
+        varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.write("C02HBa0291P19_LR48\t34\tA\tT\t1\t2\t66,67%\t1\t1\t40\t34\t0.3999999999999999\n")
+        varscanFile.close()
+    
+    def _writeVarscanFile_2(self, varscanFileName):
+        varscanFile = open(varscanFileName, 'w')
+        varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
+        varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.write("C02HBa0291P19_LR48\t32\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.close()
+        
+    def _writeVarscanFile_3(self, varscanFileName):
+        varscanFile = open(varscanFileName, 'w')
+        varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
+        varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.write("C02HBa0291P19_LR48\t32\tC\t+A\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.close()
+
+    def _writeVarscanFile_4(self, varscanFileName):
+        varscanFile = open(varscanFileName, 'w')
+        varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
+        varscanFile.write("seqname\t2\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.write("seqname\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.write("seqname\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.write("seqname\t8\tT\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.write("chrom\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.write("chrom\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
+        varscanFile.close()
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file