Mercurial > repos > yufei-luo > s_mart
diff smart_toolShed/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/smart_toolShed/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py Thu Jan 17 10:52:14 2013 -0500 @@ -0,0 +1,305 @@ +import unittest +import os + +from commons.core.parsing.VarscanFileForGnpSNP import VarscanFileForGnpSNP +from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP + +class Test_VarscanFileForGnpSNP(unittest.TestCase): + + def test__init__(self): + expFastqFileName = "SR.fastq" + expRefFastaFileName = "ref.fasta" + expTaxonName = "Arabidopsis thaliana" + expVarscanFieldSeparator = "\t" + expVarscanHitsList = [] + + iVarscanFileForGnpSNP = VarscanFileForGnpSNP("", expFastqFileName, expRefFastaFileName, expTaxonName) + + obsFastaqFileName = iVarscanFileForGnpSNP.getFastqFileName() + obsRefFastaFileName = iVarscanFileForGnpSNP.getRefFastaFileName() + obsTaxonName = iVarscanFileForGnpSNP.getTaxonName() + obsVarscanFieldSeparator = iVarscanFileForGnpSNP.getVarscanFieldSeparator() + obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() + + self.assertEquals(expFastqFileName, obsFastaqFileName) + self.assertEquals(expRefFastaFileName, obsRefFastaFileName) + self.assertEquals(expTaxonName, obsTaxonName) + self.assertEquals(expVarscanFieldSeparator, obsVarscanFieldSeparator) + self.assertEquals(expVarscanHitsList, obsVarscanHitsList) + + def test_parse(self): + varscanFileName = "varscan.tab" + self._writeVarscanFile(varscanFileName) + + varscanHit1 = VarscanHitForGnpSNP() + varscanHit1.setChrom('C02HBa0291P19_LR48') + varscanHit1.setPosition('32') + varscanHit1.setRef('C') + varscanHit1.setVar('T') + varscanHit1.setReads1('1') + varscanHit1.setReads2('2') + varscanHit1.setVarFreq('66,67%') + varscanHit1.setStrands1('1') + varscanHit1.setStrands2('1') + varscanHit1.setQual1('37') + varscanHit1.setQual2('35') + varscanHit1.setPvalue('0.3999999999999999') + varscanHit1.setGnpSNPRef("C") + varscanHit1.setGnpSNPVar("T") + varscanHit1.setGnpSNPPosition(32) + varscanHit1.setOccurrence(1) + varscanHit1.setPolymType("SNP") + varscanHit1.setPolymLength(1) + + varscanHit2 = VarscanHitForGnpSNP() + varscanHit2.setChrom('C02HBa0291P19_LR48') + varscanHit2.setPosition('34') + varscanHit2.setRef('A') + varscanHit2.setVar('T') + varscanHit2.setReads1('1') + varscanHit2.setReads2('2') + varscanHit2.setVarFreq('66,67%') + varscanHit2.setStrands1('1') + varscanHit2.setStrands2('1') + varscanHit2.setQual1('40') + varscanHit2.setQual2('34') + varscanHit2.setPvalue('0.3999999999999999') + varscanHit2.setGnpSNPRef("A") + varscanHit2.setGnpSNPVar("T") + varscanHit2.setGnpSNPPosition(34) + varscanHit2.setOccurrence(1) + varscanHit2.setPolymType("SNP") + varscanHit2.setPolymLength(1) + expVarscanHitsList = [varscanHit1, varscanHit2] + + iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '') + iVarscanFileForGnpSNP.parse() + obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() + os.remove(varscanFileName) + + self.assertEquals(expVarscanHitsList, obsVarscanHitsList) + + def test_parse_with_same_position_and_chr_and_type(self): + varscanFileName = "varscan.tab" + self._writeVarscanFile_2(varscanFileName) + + varscanHit1 = VarscanHitForGnpSNP() + varscanHit1.setChrom('C02HBa0291P19_LR48') + varscanHit1.setPosition('32') + varscanHit1.setRef('C') + varscanHit1.setVar('T') + varscanHit1.setReads1('1') + varscanHit1.setReads2('2') + varscanHit1.setVarFreq('66,67%') + varscanHit1.setStrands1('1') + varscanHit1.setStrands2('1') + varscanHit1.setQual1('37') + varscanHit1.setQual2('35') + varscanHit1.setPvalue('0.3999999999999999') + varscanHit1.setOccurrence(1) + varscanHit1._polymType = "SNP" + varscanHit1._gnpSnp_position = 32 + varscanHit1._gnpSnp_ref = "C" + varscanHit1._gnpSnp_var = "T" + + varscanHit2 = VarscanHitForGnpSNP() + varscanHit2.setChrom('C02HBa0291P19_LR48') + varscanHit2.setPosition('32') + varscanHit2.setRef('C') + varscanHit2.setVar('A') + varscanHit2.setReads1('1') + varscanHit2.setReads2('2') + varscanHit2.setVarFreq('66,67%') + varscanHit2.setStrands1('1') + varscanHit2.setStrands2('1') + varscanHit2.setQual1('37') + varscanHit2.setQual2('35') + varscanHit2.setPvalue('0.3999999999999999') + varscanHit2.setOccurrence(2) + varscanHit2._polymType = "SNP" + varscanHit2._gnpSnp_position = 32 + varscanHit2._gnpSnp_ref = "C" + varscanHit2._gnpSnp_var = "T" + + expVarscanHitsOccurence = varscanHit2._occurrence + + iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '') + iVarscanFileForGnpSNP.parse() + obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() + obsVarscanHitsOccurence = obsVarscanHitsList[1]._occurrence + os.remove(varscanFileName) + + self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence) + + def test_parse_with_same_position_and_chr_and_different_type(self): + varscanFileName = "varscan.tab" + self._writeVarscanFile_3(varscanFileName) + + varscanHit1 = VarscanHitForGnpSNP() + varscanHit1.setChrom('C02HBa0291P19_LR48') + varscanHit1.setPosition('32') + varscanHit1.setRef('C') + varscanHit1.setVar('T') + varscanHit1.setReads1('1') + varscanHit1.setReads2('2') + varscanHit1.setVarFreq('66,67%') + varscanHit1.setStrands1('1') + varscanHit1.setStrands2('1') + varscanHit1.setQual1('37') + varscanHit1.setQual2('35') + varscanHit1.setPvalue('0.3999999999999999') + varscanHit1.setOccurrence(1) + varscanHit1._polymType = "SNP" + varscanHit1._gnpSnp_position = 32 + varscanHit1._gnpSnp_ref = "C" + varscanHit1._gnpSnp_var = "T" + + varscanHit2 = VarscanHitForGnpSNP() + varscanHit2.setChrom('C02HBa0291P19_LR48') + varscanHit2.setPosition('32') + varscanHit2.setRef('C') + varscanHit2.setVar('+A') + varscanHit2.setReads1('1') + varscanHit2.setReads2('2') + varscanHit2.setVarFreq('66,67%') + varscanHit2.setStrands1('1') + varscanHit2.setStrands2('1') + varscanHit2.setQual1('37') + varscanHit2.setQual2('35') + varscanHit2.setPvalue('0.3999999999999999') + varscanHit2.setOccurrence(1) + varscanHit2._polymType = "SNP" + varscanHit2._gnpSnp_position = 32 + varscanHit2._gnpSnp_ref = "C" + varscanHit2._gnpSnp_var = "T" + + expVarscanHitsOccurence = varscanHit2._occurrence + + iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName) + iVarscanFileForGnpSNP.parse() + obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() + obsVarscanHitsOccurence = obsVarscanHitsList[1].getOccurrence() + os.remove(varscanFileName) + + self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence) + + def test_parse_on_occurence(self): + varscanFileName = "varscan.tab" + self._writeVarscanFile_4(varscanFileName) + + expOccurrence1 = 1 + expOccurrence2 = 1 + expOccurrence3 = 2 + expOccurrence4 = 1 + expOccurrence5 = 1 + expOccurrence6 = 2 + + iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName) + iVarscanFileForGnpSNP.parse() + obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList() + obsOccurrence1 = obsVarscanHitsList[0].getOccurrence() + obsOccurrence2 = obsVarscanHitsList[1].getOccurrence() + obsOccurrence3 = obsVarscanHitsList[2].getOccurrence() + obsOccurrence4 = obsVarscanHitsList[3].getOccurrence() + obsOccurrence5 = obsVarscanHitsList[4].getOccurrence() + obsOccurrence6 = obsVarscanHitsList[5].getOccurrence() + os.remove(varscanFileName) + + self.assertEquals(expOccurrence1, obsOccurrence1) + self.assertEquals(expOccurrence2, obsOccurrence2) + self.assertEquals(expOccurrence3, obsOccurrence3) + self.assertEquals(expOccurrence4, obsOccurrence4) + self.assertEquals(expOccurrence5, obsOccurrence5) + self.assertEquals(expOccurrence6, obsOccurrence6) + + def test__eq__notEqual(self): + fastqFileName = "SR.fastq" + refFastaFileName = "ref.fasta" + taxonName = "Arabidopsis thaliana" + + iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName) + + fastqFileName = "SR.fastq2" + refFastaFileName = "ref.fasta" + taxonName = "Arabidopsis thaliana" + + iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName) + + self.assertFalse(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2) + + def test__eq__equal(self): + fastqFileName = "SR.fastq" + refFastaFileName = "ref.fasta" + taxonName = "Arabidopsis thaliana" + + varscanHit1 = VarscanHitForGnpSNP() + varscanHit1.setChrom('C02HBa0291P19_LR48') + varscanHit1.setPosition('34') + varscanHit1.setRef('A') + varscanHit1.setVar('T') + varscanHit1.setReads1('1') + varscanHit1.setReads2('2') + varscanHit1.setVarFreq('66,67%') + varscanHit1.setStrands1('1') + varscanHit1.setStrands2('1') + varscanHit1.setQual1('40') + varscanHit1.setQual2('34') + varscanHit1.setPvalue('0.3999999999999999') + lVarscanHits1 = [varscanHit1] + + iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName) + iVarscanFileForGnpSNP1.setVarscanHitsList(lVarscanHits1) + + varscanHit2 = VarscanHitForGnpSNP() + varscanHit2.setChrom('C02HBa0291P19_LR48') + varscanHit2.setPosition('34') + varscanHit2.setRef('A') + varscanHit2.setVar('T') + varscanHit2.setReads1('1') + varscanHit2.setReads2('2') + varscanHit2.setVarFreq('66,67%') + varscanHit2.setStrands1('1') + varscanHit2.setStrands2('1') + varscanHit2.setQual1('40') + varscanHit2.setQual2('34') + varscanHit2.setPvalue('0.3999999999999999') + lVarscanHits2 = [varscanHit2] + + iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName) + iVarscanFileForGnpSNP2.setVarscanHitsList(lVarscanHits2) + + self.assertTrue(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2) + + def _writeVarscanFile(self, varscanFileName): + varscanFile = open(varscanFileName, 'w') + varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n") + varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.write("C02HBa0291P19_LR48\t34\tA\tT\t1\t2\t66,67%\t1\t1\t40\t34\t0.3999999999999999\n") + varscanFile.close() + + def _writeVarscanFile_2(self, varscanFileName): + varscanFile = open(varscanFileName, 'w') + varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n") + varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.write("C02HBa0291P19_LR48\t32\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.close() + + def _writeVarscanFile_3(self, varscanFileName): + varscanFile = open(varscanFileName, 'w') + varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n") + varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.write("C02HBa0291P19_LR48\t32\tC\t+A\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.close() + + def _writeVarscanFile_4(self, varscanFileName): + varscanFile = open(varscanFileName, 'w') + varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n") + varscanFile.write("seqname\t2\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.write("seqname\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.write("seqname\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.write("seqname\t8\tT\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.write("chrom\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.write("chrom\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n") + varscanFile.close() +if __name__ == "__main__": + unittest.main() \ No newline at end of file