view SMART/Java/Python/test/Test_F_ClusterizeByTags.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line source

import unittest, os, os.path
from SMART.Java.Python.misc import Utils
from SMART.Java.Python.ClusterizeByTags import ClusterizeByTags

class Test_F_ClusterizeByTags(unittest.TestCase):

    def setUp(self):
        self._inputFileName     = "inputFileTest.gff3"
        self._expOutputFileName = "expOutput.gff3"
        self._outputFileName    = "output.gff3"
        
    def tearDown(self):
        for file in (self._inputFileName, self._expOutputFileName, self._outputFileName):
            if os.path.exists(file):
                os.remove(file)

    def test_diff_simple(self):
        handle = open(self._inputFileName, "w")
        handle.write("""chr1\tS-MART\ttest1.1\t100\t200\t.\t+\t.\tName=test1.1;score=10;ID=test1.1
chr1\tS-MART\ttest1.2\t300\t400\t.\t+\t.\tName=test1.2;score=15;ID=test1.2
chr1\tS-MART\ttest1.3\t500\t600\t.\t+\t.\tName=test1.3;score=15;ID=test1.3
chr1\tS-MART\ttest1.4\t700\t800\t.\t+\t.\tName=test1.4;score=100;ID=test1.4
chr1\tS-MART\ttest1.5\t900\t1000\t.\t+\t.\tName=test1.5;score=110;ID=test1.5
chr1\tS-MART\ttest1.6\t1100\t1200\t.\t+\t.\tName=test1.6;score=105;ID=test1.6
""")
        handle.close()
        handle = open(self._expOutputFileName, "w")
        handle.write("""chr1	S-MART	test1.1	100	600	40	+	.	nbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3
chr1	S-MART	exon	100	200	10	+	.	ID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1
chr1	S-MART	exon	300	400	15	+	.	ID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1
chr1	S-MART	exon	500	600	15	+	.	ID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1
chr1	S-MART	test1.4	700	1200	315	+	.	nbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6
chr1	S-MART	exon	700	800	100	+	.	ID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4
chr1	S-MART	exon	900	1000	110	+	.	ID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4
chr1	S-MART	exon	1100	1200	105	+	.	ID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4
""")
        handle.close()
        cbt = ClusterizeByTags(0)
        cbt.setInputFile(self._inputFileName, "gff3")
        cbt.setOutputFile(self._outputFileName)
        cbt.setTag("score", None)
        cbt.setThreshold(20)
        cbt.setOperation("diff")
        cbt.setMaxDistance(None)
        cbt.run()
        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))

    def test_diff_two_strands(self):
        handle = open(self._inputFileName, "w")
        handle.write("""chr1\tS-MART\ttest1.1\t100\t200\t.\t+\t.\tName=test1.1;score=10;ID=test1.1
chr1\tS-MART\ttest1.2\t300\t400\t.\t+\t.\tName=test1.2;score=15;ID=test1.2
chr1\tS-MART\ttest1.3\t500\t600\t.\t+\t.\tName=test1.3;score=15;ID=test1.3
chr1\tS-MART\ttest1.4\t700\t800\t.\t-\t.\tName=test1.4;score=10;ID=test1.4
chr1\tS-MART\ttest1.5\t900\t1000\t.\t-\t.\tName=test1.5;score=15;ID=test1.5
chr1\tS-MART\ttest1.6\t1100\t1200\t.\t-\t.\tName=test1.6;score=15;ID=test1.6
""")
        handle.close()
        handle = open(self._expOutputFileName, "w")
        handle.write("""chr1	S-MART	test1.4	700	1200	40	-	.	nbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6
chr1	S-MART	exon	700	800	10	-	.	ID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4
chr1	S-MART	exon	900	1000	15	-	.	ID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4
chr1	S-MART	exon	1100	1200	15	-	.	ID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4
chr1	S-MART	test1.1	100	600	40	+	.	nbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3
chr1	S-MART	exon	100	200	10	+	.	ID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1
chr1	S-MART	exon	300	400	15	+	.	ID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1
chr1	S-MART	exon	500	600	15	+	.	ID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1
""")
        handle.close()
        cbt = ClusterizeByTags(0)
        cbt.setInputFile(self._inputFileName, "gff3")
        cbt.setOutputFile(self._outputFileName)
        cbt.setTag("score", None)
        cbt.setThreshold(20)
        cbt.setOperation("diff")
        cbt.setMaxDistance(None)
        cbt.run()
        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))
    
    def test_diff_one_strands(self):
        handle = open(self._inputFileName, "w")
        handle.write("""chr1\tS-MART\ttest1.1\t100\t200\t.\t+\t.\tName=test1.1;score=10;ID=test1.1
chr1\tS-MART\ttest1.2\t300\t400\t.\t+\t.\tName=test1.2;score=15;ID=test1.2
chr1\tS-MART\ttest1.3\t500\t600\t.\t+\t.\tName=test1.3;score=15;ID=test1.3
chr1\tS-MART\ttest1.4\t700\t800\t.\t-\t.\tName=test1.4;score=10;ID=test1.4
chr1\tS-MART\ttest1.5\t900\t1000\t.\t-\t.\tName=test1.5;score=15;ID=test1.5
chr1\tS-MART\ttest1.6\t1100\t1200\t.\t-\t.\tName=test1.6;score=15;ID=test1.6
""")
        handle.close()
        handle = open(self._expOutputFileName, "w")
        handle.write("""chr1	S-MART	test1.1	100	600	40	+	.	nbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3
chr1	S-MART	exon	100	200	10	+	.	ID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1
chr1	S-MART	exon	300	400	15	+	.	ID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1
chr1	S-MART	exon	500	600	15	+	.	ID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1
chr1	S-MART	test1.4	700	1200	40	-	.	nbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6
chr1	S-MART	exon	700	800	10	-	.	ID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4
chr1	S-MART	exon	900	1000	15	-	.	ID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4
chr1	S-MART	exon	1100	1200	15	-	.	ID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4
""")
        handle.close()
        cbt = ClusterizeByTags(0)
        cbt.setInputFile(self._inputFileName, "gff3")
        cbt.setOutputFile(self._outputFileName)
        cbt.setTag("score", None)
        cbt.setThreshold(20)
        cbt.setOperation("diff")
        cbt.setMaxDistance(None)
        cbt.setOneStrand(True)
        cbt.run()
        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))

    def test_diff_distance(self):
        handle = open(self._inputFileName, "w")
        handle.write("""chr1\tS-MART\ttest1.1\t100\t200\t.\t+\t.\tName=test1.1;score=10;ID=test1.1
chr1\tS-MART\ttest1.2\t300\t400\t.\t+\t.\tName=test1.2;score=15;ID=test1.2
chr1\tS-MART\ttest1.3\t500\t600\t.\t+\t.\tName=test1.3;score=15;ID=test1.3
chr1\tS-MART\ttest1.4\t1000\t1100\t.\t+\t.\tName=test1.4;score=10;ID=test1.4
chr1\tS-MART\ttest1.5\t1200\t1300\t.\t+\t.\tName=test1.5;score=15;ID=test1.5
chr1\tS-MART\ttest1.6\t1400\t1500\t.\t+\t.\tName=test1.6;score=15;ID=test1.6
""")
        handle.close()
        handle = open(self._expOutputFileName, "w")
        handle.write("""chr1	S-MART	test1.1	100	600	40	+	.	nbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3
chr1	S-MART	exon	100	200	10	+	.	ID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1
chr1	S-MART	exon	300	400	15	+	.	ID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1
chr1	S-MART	exon	500	600	15	+	.	ID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1
chr1	S-MART	test1.4	1000	1500	40	+	.	nbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6
chr1	S-MART	exon	1000	1100	10	+	.	ID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4
chr1	S-MART	exon	1200	1300	15	+	.	ID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4
chr1	S-MART	exon	1400	1500	15	+	.	ID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4
""")
        handle.close()
        cbt = ClusterizeByTags(0)
        cbt.setInputFile(self._inputFileName, "gff3")
        cbt.setOutputFile(self._outputFileName)
        cbt.setTag("score", None)
        cbt.setThreshold(20)
        cbt.setOperation("diff")
        cbt.setMaxDistance(200)
        cbt.run()
        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))


if __name__ == "__main__":
    unittest.main()